### Process tile

In [2]:
import timm
from PIL import Image
from torchvision import transforms
import torch


# Check if GPU is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


# Older versions of timm have compatibility issues. Please ensure that you use a newer version by running the following command: pip install timm>=1.0.3.
tile_encoder = timm.create_model("hf_hub:prov-gigapath/prov-gigapath", pretrained=True).to(device)


transform = transforms.Compose(
    [
        transforms.Resize(256, interpolation=transforms.InterpolationMode.BICUBIC),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ]
)


    

cuda


  return self.fget.__get__(instance, owner)()


In [11]:
img_path = "../images/01581x_25583y.png"
sample_input = transform(Image.open(img_path).convert("RGB")).unsqueeze(0).to(device)
# img_path_2 = "../images/01581x_25327y.png"
# sample_input_2 = transform(Image.open(img_path_2).convert("RGB")).unsqueeze(0).to(device)

# sample_input = torch.cat([sample_input, sample_input_2])

tile_encoder.eval()
with torch.no_grad():
    output = tile_encoder(sample_input).squeeze()
    
print(output)
    

tensor([ 0.0271, -0.1983, -0.4643,  ...,  0.5978,  0.1358,  0.6445],
       device='cuda:0')


In [None]:
tensor([[ 0.0271, -0.1983, -0.4643,  ...,  0.5978,  0.1358,  0.6445],
        [-0.6085, -0.6396,  0.2484,  ...,  0.7069, -0.1100,  0.2605]],
       device='cuda:0')

### Encode all tiles 2nd attempt

In [None]:
### slight modification of "run_inference_with_tile_encoder", 
### to be used with the tile generator instead of the data loader they use,
### which has saved tiles as input, which I hate. Just do it directly  

In [1]:
import torch
from tqdm import tqdm
from torchvision import transforms
from PIL import Image

torch.backends.cudnn.deterministic = True




@torch.no_grad()
def run_inference_with_tile_encoder(tile_encoder: torch.nn.Module, tile_dl) -> dict:
    """
    Run inference with the tile encoder

    Arguments:
    ----------
    tile_dl: tile dataloader
    tile_encoder : torch.nn.Module
        Tile encoder model
    """
    tile_encoder = tile_encoder.cuda()
    # make the tile dataloader
    # tile_dl = DataLoader(TileEncodingDataset(image_paths, transform=load_tile_encoder_transforms()), batch_size=batch_size, shuffle=False)
    # run inference
    tile_encoder.eval()
    collated_outputs = {'tile_embeds': [], 'coords': []}
    batches = []
    with torch.cuda.amp.autocast(dtype=torch.float16):
        for batch in tqdm(tile_dl, desc='Running inference with tile encoder'):
            batches.append(batch)
            collated_outputs['tile_embeds'].append(tile_encoder(batch.cuda()).detach().cpu())
            # collated_outputs['coords'].append(batch['coords'])
    # return {k: torch.cat(v) for k, v in collated_outputs.items()}
    return batches, collated_outputs

In [7]:
import h5py
with h5py.File("../tests/7b3dc0e9-cbe0-479c-b9e2-7cafc40e2b65.h5", "r") as f:
    coords = f["coords"][()]  # returns as a numpy array
    tile_embeds = f["tile_embeds"][()]  # returns as a numpy array


In [2]:
from cp_toolbox.deep_learning.torch.generators import TileGenerator
from cp_toolbox.image_processing.slide import Wsi
import h5py
import timm

### TileGenerator parameters
resolution = 20
batch_size = 20
tile_size = 256
target_size = 224  # target size of encoder, transform crops to that size in TileGenerator

filename = "7b3dc0e9-cbe0-479c-b9e2-7cafc40e2b65"
features_path = f"/main_dir/felipe/projects/CLAM/datasets/features/h5_files/{filename}.h5"
wsi_path = f"/main_dir/felipe/projects/CLAM/datasets/features/{filename}.tiff"

wsi = Wsi(wsi_path)

with h5py.File(features_path, "r") as f:
    coords = f["coords"][()]  # returns as a numpy array
coords_sample = coords[5000:5050]

transform_2 = transforms.Compose(
    [
        transforms.ToPILImage(),
        transforms.Resize(256, interpolation=transforms.InterpolationMode.BICUBIC),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ]
)

tile_gen = TileGenerator(
    image=wsi,
    tile_size=tile_size,
    target_size = target_size,
    coords=coords_sample,
    level=wsi.get_level_from_resolution(20),
    batch_size=batch_size,
    preprocessing_function=transform_2,
    cvt_color_code="rgb"
)

tile_encoder = timm.create_model("hf_hub:prov-gigapath/prov-gigapath", pretrained=True)


Image type: <class 'cp_toolbox.image_processing.slide.Wsi'>
Filename: 7b3dc0e9-cbe0-479c-b9e2-7cafc40e2b65.tiff


  return self.fget.__get__(instance, owner)()


In [3]:
tile_gen.preprocessing_function

Compose(
    ToPILImage()
    Resize(size=256, interpolation=bicubic, max_size=None, antialias=warn)
    CenterCrop(size=(224, 224))
    ToTensor()
    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
)

In [7]:
coords_sample = coords[5000:5050]

In [8]:
tile_gen = TileGenerator(
    image=wsi,
    tile_size=tile_size,
    coords=coords_sample,
    level=wsi.get_level_from_resolution(20),
    batch_size=batch_size,
    preprocessing_function=transform_2,
    cvt_color_code="rgb"
)
asdf = run_inference_with_tile_encoder(
    tile_encoder=tile_encoder,
    tile_dl=tile_gen.data_loader()
)

Image type: <class 'cp_toolbox.image_processing.slide.Wsi'>
Filename: 7b3dc0e9-cbe0-479c-b9e2-7cafc40e2b65.tiff


Running inference with tile encoder: 100%|██████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  3.07it/s]


In [9]:
asdf

([tensor([[[[ 2.0092,  1.9749,  2.0092,  ...,  1.9920,  2.0092,  1.9920],
            [ 2.0092,  1.9920,  2.0092,  ...,  1.9749,  2.0092,  1.9920],
            [ 1.9920,  1.9749,  1.9920,  ...,  1.9749,  2.0092,  1.9749],
            ...,
            [ 1.9235,  1.9235,  1.9235,  ...,  2.1633,  2.1290,  2.0948],
            [ 1.9235,  1.9235,  1.9235,  ...,  2.0948,  2.0605,  2.0434],
            [ 2.0092,  2.0092,  1.9749,  ...,  1.9920,  1.9578,  1.9578]],
  
           [[ 1.8508,  1.8158,  1.8158,  ...,  1.9034,  1.9209,  1.9384],
            [ 1.9034,  1.8859,  1.9034,  ...,  1.9384,  1.9559,  1.9734],
            [ 1.9559,  1.9384,  1.9559,  ...,  1.9909,  1.9909,  1.9909],
            ...,
            [ 2.0259,  2.0259,  2.0259,  ...,  0.7654,  0.7304,  0.6954],
            [ 2.0259,  2.0259,  2.0259,  ...,  0.6779,  0.6429,  0.6254],
            [ 1.9909,  1.9909,  1.9909,  ...,  0.5553,  0.5553,  0.5553]],
  
           [[ 2.5529,  2.5529,  2.5703,  ...,  2.4831,  2.5006,  2.465

### Encode all tiles 3rd attempt

In [17]:
from torch.utils.data import Dataset, DataLoader
from openslide import OpenSlide
import torch
import timm
from tqdm import tqdm
import numpy as np

class TileGeneratorDataset(Dataset):
    """
    Do encoding for tiles

    Arguments:
    ----------

    transform : torchvision.transforms.Compose
        Transform to apply to each image
    """
    def __init__(self, wsi, tile_size, resolution, coords=None, transform=None):
        self.transform = transform
        self.wsi = wsi  # openslide.OpenSlide object
        self.coords = coords  # np.array of [x, y] coords at OpenSlide level 0
        self.tile_size = tile_size
        self.resolution = resolution
        self.level = wsi.get_level_from_resolution(resolution)

    def __len__(self):
        return len(self.coords)

    def __getitem__(self, idx):

        x, y = coords[idx]
        img = wsi.read_region((x, y), self.level, (tile_size, tile_size)).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return {'img': torch.from_numpy(np.array(img)),
                'coords': torch.from_numpy(np.array([x, y])).float()}

@torch.no_grad()
def run_inference_with_tile_encoder(tile_encoder: torch.nn.Module, tile_dl) -> dict:
    """
    Run inference with the tile encoder

    Arguments:
    ----------
    tile_dl: tile dataloader
    tile_encoder : torch.nn.Module
        Tile encoder model
    """
    tile_encoder = tile_encoder.cuda()
    # make the tile dataloader
    # tile_dl = DataLoader(TileEncodingDataset(image_paths, transform=load_tile_encoder_transforms()), batch_size=batch_size, shuffle=False)
    # run inference
    tile_encoder.eval()
    collated_outputs = {'tile_embeds': [], 'coords': []}
    batches = []
    with torch.cuda.amp.autocast(dtype=torch.float16):
        for batch in tqdm(tile_dl, desc='Running inference with tile encoder'):
            batches.append(batch)
            collated_outputs['tile_embeds'].append(tile_encoder(batch['img'].cuda()).detach().cpu())
            collated_outputs['coords'].append(batch['coords'])
    return {k: torch.cat(v) for k, v in collated_outputs.items()}
    


In [3]:
tile_encoder = timm.create_model("hf_hub:prov-gigapath/prov-gigapath", pretrained=True)

  return self.fget.__get__(instance, owner)()


In [29]:
from cp_toolbox.deep_learning.torch.generators import TileGenerator
from cp_toolbox.image_processing.slide import Wsi
import h5py
import timm

from gigapath.pipeline import load_tile_encoder_transforms

### TileGenerator parameters
resolution = 20
batch_size = 20
tile_size = 256
target_size = 224  # target size of encoder, transform crops to that size in TileGenerator

filename = "7b3dc0e9-cbe0-479c-b9e2-7cafc40e2b65"
features_path = f"/main_dir/felipe/projects/CLAM/datasets/features/h5_files/{filename}.h5"
wsi_path = f"/main_dir/felipe/projects/CLAM/datasets/features/{filename}.tiff"

wsi = Wsi(wsi_path)

with h5py.File(features_path, "r") as f:
    coords = f["coords"][()]  # returns as a numpy array
coords_sample = coords[5000:6000]


tile_dl = DataLoader(
    TileGeneratorDataset(
        transform=load_tile_encoder_transforms(),
        wsi=wsi,
        coords=coords_sample,
        tile_size=tile_size,
        resolution=resolution
    ),
    batch_size=batch_size,
    shuffle=False
)

In [33]:
wsi.filename

'7b3dc0e9-cbe0-479c-b9e2-7cafc40e2b65'

In [30]:
output = run_inference_with_tile_encoder(tile_encoder=tile_encoder, tile_dl=tile_dl)

Running inference with tile encoder: 100%|████████████████████████████████████████████████████████████| 50/50 [00:16<00:00,  2.95it/s]


In [31]:
output

{'tile_embeds': tensor([[ 0.4993, -1.4950,  2.1918,  ..., -0.0545,  0.0081,  0.1599],
         [ 0.7708, -0.2690,  0.9814,  ..., -0.4922,  2.1371, -0.9408],
         [ 0.5142, -0.0161,  2.5200,  ..., -0.1084,  0.9910, -0.6875],
         ...,
         [-0.5244,  0.2533,  0.0040,  ..., -0.4060,  0.0287,  1.3288],
         [-0.4027, -1.1646,  0.0037,  ...,  1.3960,  0.2657,  0.0773],
         [-0.2534, -0.3035, -1.3161,  ..., -0.3749,  0.0645,  0.5636]]),
 'coords': tensor([[ 67584.,      0.],
         [ 68096.,      0.],
         [ 68608.,      0.],
         ...,
         [113664.,   2560.],
         [122880.,   2560.],
         [123392.,   2560.]])}

In [25]:
output

{'tile_embeds': tensor([[ 0.4941, -1.4962,  2.1843,  ..., -0.0500,  0.0146,  0.1759],
         [ 0.7880, -0.2803,  1.0191,  ..., -0.4930,  2.1281, -0.9108],
         [ 0.5081, -0.0171,  2.5408,  ..., -0.0973,  0.9698, -0.6833],
         ...,
         [-0.5157,  0.2826,  0.0232,  ..., -0.4018, -0.0191,  1.3132],
         [-0.4301, -1.1413,  0.0317,  ...,  1.3918,  0.2644,  0.0988],
         [-0.2446, -0.2747, -1.3174,  ..., -0.3849,  0.0658,  0.5618]]),
 'coords': tensor([[ 67584.,      0.],
         [ 68096.,      0.],
         [ 68608.,      0.],
         ...,
         [113664.,   2560.],
         [122880.,   2560.],
         [123392.,   2560.]])}

### Process slide with encoded tiles

In [11]:
import gigapath.slide_encoder

slide_encoder = gigapath.slide_encoder.create_model("hf_hub:prov-gigapath/prov-gigapath", "gigapath_slide_enc12l768d", 1536)


dilated_ratio:  [1, 2, 4, 8, 16]
segment_length:  [1024, 5792, 32768, 185363, 1048576]
Number of trainable LongNet parameters:  85148160
Global Pooling: False


slide_encoder.pth:   0%|          | 0.00/345M [00:00<?, ?B/s]

[92m Successfully Loaded Pretrained GigaPath model from hf_hub:prov-gigapath/prov-gigapath [00m


In [12]:
import h5py
with h5py.File("../tests/7b3dc0e9-cbe0-479c-b9e2-7cafc40e2b65.h5", "r") as f:
    coords = f["coords"][()]  # returns as a numpy array
    tile_embeds = f["tile_embeds"][()]  # returns as a numpy array


In [20]:
from gigapath.pipeline import run_inference_with_slide_encoder

output = run_inference_with_slide_encoder(
    tile_embeds=torch.tensor(tile_embeds),
    coords=torch.tensor(coords),
    slide_encoder_model=slide_encoder
)

In [22]:
output.keys()

dict_keys(['layer_0_embed', 'layer_1_embed', 'layer_2_embed', 'layer_3_embed', 'layer_4_embed', 'layer_5_embed', 'layer_6_embed', 'layer_7_embed', 'layer_8_embed', 'layer_9_embed', 'layer_10_embed', 'layer_11_embed', 'layer_12_embed', 'last_layer_embed'])