In [16]:
%load_ext autoreload
%autoreload 2
%load_ext yamlmagic

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The yamlmagic extension is already loaded. To reload it, use:
  %reload_ext yamlmagic


# MultiverSeg embedding sandbox

Quick testbed to extract a single vector embedding from the MultiverSeg encoder using global avg/max pooling.

In [17]:
from pathlib import Path
import sys
import torch

repo_root = Path("/data/ddmg/mvseg-ordering/")
for path in [repo_root, repo_root / "UniverSeg", repo_root / "MultiverSeg"]:
    if str(path) not in sys.path:
        sys.path.append(str(path))

from experiments.dataset.mega_medical_dataset import MegaMedicalDataset
from experiments.encoders.multiverseg_encoder import MultiverSegEncoder
from experiments.encoders.clip import CLIPEncoder
from experiments.encoders.vit import ViTEncoder


## Pick an encoder
Uncomment the encoder you want to test.


In [18]:
# Choose a MegaMedical target. Index 0 is a convenient default; adjust if needed.
dataset_target = 12
dataset_split = "train"
dataset_size = 4  # small subset for a quick sanity check

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

ds = MegaMedicalDataset(
    dataset_target=dataset_target,
    split=dataset_split,
    dataset_size=dataset_size,
)
len(ds)

No updates to index
Filtered task_df: 1248
got task df: 1248


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["label_type"].fillna("soft", inplace=True)


target_datasets: 1248


4

In [19]:
# Grab one sample (image, mask)
img, lbl = ds[0]
print("Image shape:", tuple(img.shape))
print("Label shape:", tuple(lbl.shape))

# Add batch dimension and move to device
img_b = img.to(device)
lbl_b = lbl.to(device)


Image shape: (1, 128, 128)
Label shape: (1, 128, 128)


In [21]:
# MultiverSeg encoder (default)
enc = MultiverSegEncoder(
    pooling="gap",
).to(device).eval()

# CLIP encoder
clip = CLIPEncoder(
    model_name="ViT-B-32",
    pretrained="openai",
).to(device).eval()
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# # ViT encoder
vit = ViTEncoder(
    model_name="vit_b_16",
    pretrained=True,
).to(device).eval()



In [22]:
print(img_b.shape)

torch.Size([1, 128, 128])


In [23]:
# Compute embedding (global avg + max pooled, L2-normalized)
with torch.no_grad():
    emb = clip(img_b)

print("Embedding shape:", tuple(emb.shape))
print("First 8 dims:", emb[0, :8].cpu().numpy())

# Compute embedding (global avg + max pooled, L2-normalized)
with torch.no_grad():
    emb = vit(img_b)

print("Embedding shape:", tuple(emb.shape))
print("First 8 dims:", emb[0, :8].cpu().numpy())

# Compute embedding (global avg + max pooled, L2-normalized)
with torch.no_grad():
    emb = enc(img_b)

print("Embedding shape:", tuple(emb.shape))
print("First 8 dims:", emb[0, :8].cpu().numpy())




Embedding shape: (1, 512)
First 8 dims: [ 0.00735284 -0.01650549  0.00367393 -0.01733396 -0.0033046  -0.02681194
  0.00133051  0.01993955]
Embedding shape: (1, 768)
First 8 dims: [ 0.0122594   0.07449619  0.05001451 -0.09478112  0.02037749  0.00559178
 -0.01800939  0.02084701]
Embedding shape: (1, 256)
First 8 dims: [ 0.01669559 -0.04529885  0.0234932  -0.00304918 -0.04687631 -0.03542258
  0.01770178 -0.02281269]
