In [11]:
import os
import numpy as np
import openslide
from openslide.deepzoom import DeepZoomGenerator
from openslide import open_slide, ImageSlide
from PIL import Image
from matplotlib import pyplot as plt
from openslide import OpenSlide
import cv2
import numpy as np
from matplotlib import pyplot as plt
from openslide import OpenSlide

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import pandas as pd
import pyvips
from torchvision.transforms import ToPILImage

In [13]:
# --- 1. Load DINO-v2 Backbone ---

def get_dino_v2_backbone():
    # This returns a model directly, not a state_dict
    backbone = torch.hub.load("facebookresearch/dinov2", "dinov2_vitb14")  # ✅ this is already a model
    return backbone


class DINOv2SegmentationModel(nn.Module):
    def __init__(self, backbone, num_classes):
        super().__init__()
        self.backbone = backbone
        self.num_classes = num_classes
        self.decoder = nn.Sequential(
            nn.Conv2d(768, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, num_classes, kernel_size=1)
        )

    def forward(self, x):
        B, C, H, W = x.shape
        features = self.backbone.get_intermediate_layers(x, n=1)[0]  # (B, N, 768)
        feat_size = int(features.shape[1] ** 0.5)
        features = features.permute(0, 2, 1).reshape(B, 768, feat_size, feat_size)  # (B, 768, h, w)
        out = self.decoder(features)  # (B, num_classes, h, w)
        out = F.interpolate(out, size=(H, W), mode='bilinear', align_corners=False)  # upscale to 1022x1022
        return out
    
def make_prediction(image):
    transform = transforms.Compose([
        transforms.Resize((1022, 1022)),
        transforms.ToTensor(),
        transforms.Normalize([0.5]*3, [0.5]*3)
    ])

    #image = Image.open(img_path).convert("RGB")
    image = image.convert("RGB")
    input_tensor = transform(image).unsqueeze(0).to(device)  # (1, 3, 1022, 1022)
    image_np = np.array(image.convert("RGB").resize((1022, 1022)))
    with torch.no_grad():
        output = model(input_tensor)  # (1, num_classes, 1022, 1022)
        #print(output, output.shape)
        #pred_mask = output.argmax(dim=1).squeeze(0).cpu().numpy()  # (1022, 1022)
        probs = torch.softmax(output, dim=1)  # convert logits → probabilities
        conf, pred = torch.max(probs, dim=1)  # conf: confidence per pixel, pred: predicted class
        conf = conf.squeeze(0).cpu().numpy()  # (1022, 1022)
        pred_mask = pred.squeeze(0).cpu().numpy()  # (1022, 1022)
        threshold = 0.9  # choose based on your model's calibration
        high_conf_mask = np.where(conf >= threshold, pred_mask, 0)
        print(np.unique(high_conf_mask))
        
    mask_rescaled = cv2.resize(high_conf_mask, (512, 512), interpolation=cv2.INTER_NEAREST)
    return image_np, mask_rescaled


def is_background(tile, threshold=0.8):
    """
    Check if a tile is mostly background (white).
    Args:
        tile (PIL.Image): The tile image.
        threshold (float): Fraction of white pixels to consider as background (0.0–1.0).
    Returns:
        bool: True if background, False otherwise.
    """
    #img = np.array(tile.convert("L"))  # Convert to grayscale
    img = np.array(tile)
    white_pixels = np.sum(img > 240)   # Count near-white pixels
    total_pixels = img.size
    white_ratio = white_pixels / total_pixels
    return white_ratio > threshold

def get_bbox(row, col,pred_mask):
    class_ids = np.unique(pred_mask)
    class_ids = class_ids[class_ids != 0]
    pred_boxes = []
    for class_id in class_ids:
        # Create binary mask for this class
        binary = (pred_mask == class_id).astype(np.uint8)
        # Find contours for that class
        contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        sorted_with_area = sorted([(c, cv2.contourArea(c)) for c in contours], key=lambda x: x[1],reverse=True)
        for contour, area in sorted_with_area:
            if area>2000:
                #print(area)
                x, y, w, h = cv2.boundingRect(contour)
                pred_boxes.append([row, col, x, y, w, h, class_id])
    return pred_boxes

In [9]:
def crop_tile(slide, out_dir, tile, ext='png'):
    # Generate the output filename based on the tile coordinates and extension
    out_file = out_dir + '/'+'_'.join(map(lambda _: '_'.join(_), zip('xy', map(str, tile[:2])))) + f'.{ext}'
    # Crop the slide using the tile coordinates and convert to numpy array
    crop = slide.crop(tile[1]*512, tile[0]*512, 512, 512)
    crop = np.ndarray(
            buffer=crop.write_to_memory(), dtype=np.uint8,
            shape=(crop.height, crop.width, crop.bands))[..., :3]  # Keep only the RGB channels
    bg_flag = is_background(crop, threshold=0.6)
    return bg_flag

In [10]:
path = "/gladstone/finkbeiner/steve/work/data/npsad_data/monika/ALS/dino_v2_segmentation_oct10.pth"
path = "/gladstone/finkbeiner/steve/work/data/npsad_data/monika/ALS/dino_v2_segmentation_oct17.pth"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
backbone = get_dino_v2_backbone()
model = DINOv2SegmentationModel(backbone, num_classes=3)  # Update `num_classes` as needed
model.load_state_dict(torch.load(path, map_location=device))
model.to(device)
model.eval()  # 🔍 Important for inference

Using cache found in /home/mahirwar/.cache/torch/hub/facebookresearch_dinov2_main
  model.load_state_dict(torch.load(path, map_location=device))


DINOv2SegmentationModel(
  (backbone): DinoVisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(14, 14), stride=(14, 14))
      (norm): Identity()
    )
    (blocks): ModuleList(
      (0-11): 12 x NestedTensorBlock(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): MemEffAttention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): LayerScale()
        (drop_path1): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='none')
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (drop): Dropout(p=0.0, inplace=False

In [5]:
fname = '/gladstone/finkbeiner/steve/work/data/ALS/fals-lumbar/NEUKM699KKH_Lumbar_pTDP_43.svs'

output_dir = '/gladstone/finkbeiner/steve/work/data/npsad_data/monika/ALS/tiles-npy'

size = 512

slide = pyvips.Image.new_from_file(fname, level=0)
im = slide.numpy()[..., :3]     
h, w = im.shape[0],  im.shape[1]

t = [(y, x) for x in range((w // size)-1) for y in range((h// size)-1)]

for_sel = [crop_tile(slide, output_dir, t[i]) for i in range(len(t))]

false_indices = [i for i in range(len(for_sel)) if for_sel[i]==False]

print(len(false_indices))




5971


In [16]:
pred_boxes_all = []

for idx in false_indices:
    val = t[idx]
    crop = slide.crop(val[1]*512, val[0]*512,512,512)
    crop = np.ndarray(
            buffer=crop.write_to_memory(), dtype=np.uint8,
            shape=(crop.height, crop.width, crop.bands))[..., :3]  # Keep only the RGB channels
    crp = Image.fromarray(crop)
    img, pred_mask = make_prediction(crp)
    pred_boxes = get_bbox(val[1], val[0], pred_mask)
    pred_boxes_all.extend(pred_boxes)


[0]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0]
[0 2]
[0 2]
[0 1 2]
[0 2]
[0 2]
[0]
[0 2]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0 2]
[0]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0]
[0]
[0]
[0 2]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0]
[0]
[0 2]
[0]
[0 1]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0 2]
[0]
[0]
[0]
[0 2]
[0 2]
[0]
[0 2]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0 2]
[0

In [17]:
    
df = pd.DataFrame(pred_boxes_all, columns=["row","col","x1","y1","x2","y2","class"])
df.to_csv(os.path.join(output_dir, "NEUKM699KKH_Lumbar_pTDP_43.csv"))