In [1]:
import os
import pickle
import openslide as ops
import numpy as np
import logging
import tensorflow as tf
import keras
from huggingface_hub import from_pretrained_keras

2025-02-15 10:53:23.727716: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
WSI_DIR = "./wsis"
MODEL_NAME = "/home/cilem/.cache/huggingface/hub/models--google--path-foundation/snapshots/fd6a835ceaae15be80db6abd8dcfeb86a9287e72"
PATCH_DIR = "./embeddings"
LOG_NAME = "embedding_extractor.log"
PATCH_SIZE = 512
OVERLAP = 0

In [3]:
logging.basicConfig(level=logging.INFO, 
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 
                    filename="./logs/{}".format(LOG_NAME))

logging.info("Starting patch extraction...")
if not os.path.exists(PATCH_DIR):
    os.makedirs(PATCH_DIR)

logging.info("Extracting patches from WSI...")
logging.info("Patch size: {}".format(PATCH_SIZE))
logging.info("Overlap: {}".format(OVERLAP))
logging.info("WSI directory: {}".format(WSI_DIR))
logging.info("Patch directory: {}".format(PATCH_DIR))

In [4]:
class PatchEmbeddingExtractor:
    def __init__(self, slide_root_path, model_name, patch_size, overlap):
        self.model = keras.layers.TFSMLayer(model_name, call_endpoint='serving_default')
        #self.infer = self.model.signatures["serving_default"]
        self.infer = self.model
        

        self.slides_path = []
        for root, dirs, files in os.walk(slide_root_path):
            for file in files:
                if file.endswith((".svs", ".tiff", ".tif")):
                    self.slide_path = os.path.join(root, file)
                    self.slides_path.append(self.slide_path)
        
        self.patch_size = patch_size
        self.overlap = overlap

    def __len__(self):
        return len(self.embeddings)
    
    def extract_patch_embeddings(self):
        self.embeddings = []
        for slide_path in self.slides_path:
            try:
                slide = ops.OpenSlide(slide_path)
                slide_name = os.path.basename(slide_path)
                slide_width, slide_height = slide.dimensions
                patch_width, patch_height = self.patch_size
                overlap_width, overlap_height = self.overlap

                for y in range(0, slide_height, patch_height-overlap_height):
                    for x in range(0, slide_width, patch_width-overlap_width):
                        patch = slide.read_region(location=(x, y), level=0, size=self.patch_size)
                        if patch.size < self.patch_size:
                            continue
                        else:
                            patch_ = patch.convert("RGB")
                            patch = patch_.resize((224, 224))
                            patch = np.array(patch)
                            img = tf.cast(patch, tf.float32) / 255.0
                            img = tf.expand_dims(img, 0)
                            embedding = self.infer(tf.constant(img))["output_0"].numpy()
                            self.embeddings.append({
                                "slide_name": slide_name,
                                "x": x,
                                "y": y,
                                "level": 0,
                                "patch_size": self.patch_size,
                                "resize": (224, 224),
                                "embedding_vector": embedding
                            })
                            logging.info(f"Extracted patch embedding from {slide_path} at ({x}, {y})")
            except Exception as e:
                logging.error(f"Error extracting patch embeddings from {slide_path}: {e}")

        return self.embeddings

In [5]:
extractor = PatchEmbeddingExtractor(slide_root_path=WSI_DIR, 
                                    model_name= MODEL_NAME,
                                    patch_size=(PATCH_SIZE, PATCH_SIZE), 
                                    overlap=(OVERLAP, OVERLAP))

embeddings = extractor.extract_patch_embeddings()
logging.info("Number of extracted patches: {}".format(len(embeddings)))

I0000 00:00:1739606018.475251  990006 service.cc:148] XLA service 0x7fe24c00b630 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1739606018.475310  990006 service.cc:156]   StreamExecutor device (0): Host, Default Version
2025-02-15 10:53:38.645729: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1739606022.838840  990006 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


In [6]:
print("Number of extracted patches: {}".format(len(embeddings)))

Number of extracted patches: 890163


In [7]:
logging.info("Saving embeddings...")
with open(os.path.join(PATCH_DIR, f"embeddings_{PATCH_SIZE}.pkl"), "wb") as f:
    pickle.dump(embeddings, f)

In [None]:
import pickle
import os
dataset = pickle.load(open(os.path.join(PATCH_DIR, f"embeddings_{PATCH_SIZE}.pkl"), "rb"))
print(len(dataset))

In [None]:
import huggingface_hub
import tensorflow
import keras
print(huggingface_hub.__version__)
print(tensorflow.__version__)
print(keras.__version__)

In [None]:
from PIL import Image as PILImage
from IPython.display import display
from huggingface_hub import from_pretrained_keras
img = PILImage.open("Test.png").crop((0, 0, 224, 224)).convert('RGB')
display(img)

In [None]:
# @title Compute Embeddings
from huggingface_hub import from_pretrained_keras
import matplotlib.pyplot as plt
import keras
import tensorflow as tf
import numpy as np

# Convert the image to a Tensor and scale to [0, 1]
tensor = tf.cast(tf.expand_dims(np.array(img), axis=0), tf.float32) / 255.0
print("Tensor shape:", type(tensor), tensor.shape)
# Load the model directly from Hugging Face
loaded_model = keras.layers.TFSMLayer("/home/cilem/.cache/huggingface/hub/models--google--path-foundation/snapshots/fd6a835ceaae15be80db6abd8dcfeb86a9287e72", call_endpoint='serving_default')

# Call inference
infer = loaded_model
embeddings = infer(tf.constant(tensor))

# Extract the embedding vector
embedding_vector = embeddings['output_0'].numpy()
print("Size of embedding vector:", embedding_vector.shape)

# Plot the embedding vector
plt.figure(figsize=(12, 4))
plt.plot(embedding_vector)
plt.title('Embedding Vector')
plt.xlabel('Index')
plt.ylabel('Value')
plt.grid(True)
plt.show()