In [None]:
import os
import numpy as np
from PIL import Image, ImageDraw
from lxml import etree
import matplotlib.pyplot as plt
os.add_dll_directory(
    r"C:\Program Files\OpenSlide\openslide-bin-4.0.0.8-windows-x64\bin"
)
from openslide import OpenSlide

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    DEBUG = '\033[96m'
    INFO = '\033[92m'
    WARNING = '\033[93m'
    ERROR = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

print(f"Workig directory: {os.getcwd()}")

def parse_xml_mask(xml_path, level_dims, slide, level):
    """
    Convert XML annotation to binary mask.
    Parameters:
    - xml_path: str, path to the XML file containing annotations.
    - level_dims: tuple, dimensions of the WSI at the specified level (width, height).
    - slide: OpenSlide object for the WSI.
    - level: int, target level for mask.
    """
    try:
        tree = etree.parse(xml_path)
    except etree.XMLSyntaxError as e:
        print(f"{bcolors.ERROR}[ERROR]{bcolors.ENDC} Error parsing XML file {xml_path}: {e}")
        return None

    # Compute scaling factors based on actual dimensions
    base_dims = slide.level_dimensions[0]
    scale_x = level_dims[0] / base_dims[0]
    scale_y = level_dims[1] / base_dims[1]

    mask = Image.new("L", level_dims, 0)
    draw = ImageDraw.Draw(mask)

    for coordinates_node in tree.xpath("//Annotation/Coordinates | //Annotations/Annotation/Coordinates"):
        coords = []
        for coord_node in coordinates_node.findall("Coordinate"):
            try:
                x = float(coord_node.get("X"))
                y = float(coord_node.get("Y"))
                # Scale coordinates to the target level
                scaled_x = int(x * scale_x)
                scaled_y = int(y * scale_y)
                coords.append((scaled_x, scaled_y))
            except (ValueError, TypeError) as e:
                print(f"{bcolors.WARNING}Warning: Could not parse coordinate (X,Y) from XML for {xml_path}: {e}{bcolors.ENDC}")
                continue
        if coords:
            draw.polygon(coords, outline=255, fill=255)
    return mask

wsi_dir = os.path.join(os.getcwd(), "..", "data", "camelyon16", "train", "img")
annot_dir_train = os.path.join(
    os.getcwd(),  "..", "data", "camelyon16", "train", "mask"
)
annot_dir_test = os.path.join(
    os.getcwd(), "..", "data", "camelyon16", "train", "img"
)
level_dir = os.path.join(
    os.getcwd(),  "..", "data", "camelyon16", "patches", "level_3"
)
# Print if all directories exist
print("WSI directory exists:", wsi_dir, os.path.exists(wsi_dir))
print("Annotation directory (train) exists:", annot_dir_train, os.path.exists(annot_dir_train))
print("Annotation directory (test) exists:", os.path.exists(annot_dir_test))
print("Level directory exists:", level_dir, os.path.exists(level_dir))


In [None]:
level = 3
for slide_name in os.listdir(level_dir):
    subdir = os.path.join(level_dir, slide_name)
    labels = [f for f in os.listdir(subdir) if os.path.isdir(os.path.join(subdir, f))]
    print(f"{slide_name}: labels -> {labels}")

wsi_name = "tumor_076.tif"
xml_path = os.path.join(annot_dir_train, wsi_name.replace(".tif", ".xml"))
print("XML path exists:", os.path.exists(xml_path))
print("WSI path ", os.path.join(wsi_dir, wsi_name))
slide = OpenSlide(os.path.join(wsi_dir, wsi_name))
level_dims = slide.level_dimensions[3]
mask = parse_xml_mask(xml_path, level_dims, slide, 3)


plt.imshow(mask)
plt.title("Parsed XML Mask")
plt.show()


x, y = 1000, 1500  
patch = slide.read_region((int(x * slide.level_downsamples[3]), int(y * slide.level_downsamples[3])), 3, (224, 224)).convert("RGB")

mask_patch = mask.crop((x, y, x + 224, y + 224))

fig, ax = plt.subplots(1, 2, figsize=(10, 5))
ax[0].imshow(patch)
ax[0].set_title("Image Patch")
ax[1].imshow(mask_patch, cmap="gray")
ax[1].set_title("Mask Patch")
plt.show()

