### **1. Visualize the normal and tumor training images**

In [27]:
import numpy as np
import matplotlib.pyplot as plt
from openslide import OpenSlide
from PIL import Image
import matplotlib.pyplot as plt
import os

def show_wsi_thumbnail(wsi_path, thumbnail_size=(2048, 2048), title=""):
    slide = OpenSlide(wsi_path)
    thumbnail = slide.get_thumbnail(thumbnail_size)

    plt.figure(figsize=(12, 12))
    plt.imshow(thumbnail)
    plt.title(title)
    plt.axis("off")
    plt.show()

normal_path = os.path.join(os.getcwd(), "..", "data", "camelyon16", "train", "img", "normal_001.tif")
print("Normal WSI path:", normal_path)
tumor_path = os.path.join(os.getcwd(), "..", "data", "camelyon16", "train", "img", "tumor_001.tif")
show_wsi_thumbnail(normal_path, title="Normal WSI")
show_wsi_thumbnail(tumor_path, title="Tumor WSI")

# Save thumbnails as PNG files
OpenSlide(normal_path).get_thumbnail((2048, 2048)).save("normal_thumbnail.png")
OpenSlide(tumor_path).get_thumbnail((2048, 2048)).save("tumor_thumbnail.png")



Normal WSI path: c:\Users\anaca\Documents\sexto.curso\tfg info\fresh-clone\ss25_Hierarchical_Multiscale_Image_Classification\src\..\data\camelyon16\train\img\normal_001.tif


OpenSlideUnsupportedFormatError: Unsupported or missing image file

OpenSlideUnsupportedFormatError: Unsupported or missing image file

In [None]:
import zipfile

def extract_zip(zip_path, extract_to):
    if not os.path.exists(extract_to):
        os.makedirs(extract_to)
    else:
        print(f"[INFO] Directory of destination already exists. Skipping extraction.")
        return
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    print(f"[INFO] Extracted {zip_path} to {extract_to}")

zip_path = os.path.join(os.getcwd(), "..", "data", "camelyon16", "masks", "lesion_annotations.zip")
extract_to = os.path.join(os.getcwd(), "..", "data", "camelyon16", "masks", "annotations")
extract_zip(zip_path, extract_to)


[INFO] Directory c:\Users\anaca\Documents\sexto.curso\tfg info\fresh-clone\ss25_Hierarchical_Multiscale_Image_Classification\src\..\data\camelyon16\masks\annotations already exists. Skipping extraction.


### **2. Parsing the XML mask**

In [None]:
import xml.etree.ElementTree as ET

def parse_annotation(xml_path):
    """
    Parse the XML annotation file and extract regions of tumor.
    Parameters:
        xml_path (str): Path to the XML annotation file.
    """
    tree = ET.parse(xml_path)
    root = tree.getroot()

    regions = []
    for annotation in root.iter('Annotation'):
        for region in annotation.iter('Region'):
            points = []
            for vertex in region.iter('Vertex'):
                x = float(vertex.get('X'))
                y = float(vertex.get('Y'))
                points.append((x, y))
            regions.append(points)
    return regions


### **3. Overlaying the mask on the WSI**

In [None]:
import matplotlib.pyplot as plt
from openslide import OpenSlide
import numpy as np
from PIL import ImageDraw

def show_mask_overlay(wsi_path, xml_path, level=4):
    slide = OpenSlide(wsi_path)
    thumb = slide.get_thumbnail(slide.level_dimensions[level])
    thumb = thumb.convert("RGBA")
    draw = ImageDraw.Draw(thumb, "RGBA")

    regions = parse_annotation(xml_path)
    scale_factor = slide.level_dimensions[0][0] / slide.level_dimensions[level][0]

    for polygon in regions:
        downscaled = [(x / scale_factor, y / scale_factor) for x, y in polygon]
        draw.polygon(downscaled, outline=(255, 0, 0, 255), fill=(255, 0, 0, 80))

    plt.figure(figsize=(10, 10))
    plt.imshow(thumb)
    plt.title(f"Tumor Regions Overlay for {wsi_path}")
    plt.axis("off")
    plt.show()

wsi_path = "./data/camelyon16/train/img/tumor_001.tif"
xml_path = "./data/camelyon16/masks/annotations/tumor_001.xml"
show_mask_overlay(wsi_path, xml_path)


## **Convert mask and images to tensor**