In [1]:
# !pip install 'zarr<3'
# !pip install timm
# !pip install openslide-python tiffslide
# !pip install cupy-cuda12x  # for CUDA 12.x

In [1]:
# ALWAYS RUN THIS FIRST!
import os
import sys
from pathlib import Path

NOTEBOOK_DIR = Path("/rsrch9/home/plm/idso_fa1_pathology/codes/yshokrollahi/vitamin-p-latest")
os.chdir(NOTEBOOK_DIR)
sys.path.insert(0, str(NOTEBOOK_DIR))

print(f"‚úÖ Working directory: {os.getcwd()}")


‚úÖ Working directory: /rsrch9/home/plm/idso_fa1_pathology/codes/yshokrollahi/vitamin-p-latest


## WSI Flex

In [2]:
"""
Example: Seamless VitaminPSyn Inference
User just provides H&E slide ‚Üí GAN generates synthetic MIF in background ‚Üí Segmentation results
"""

import torch
from vitaminp import VitaminPSyn
from vitaminp.inference import ChannelConfig, WSIPredictor

# ========================================================================
# EXAMPLE 1: VitaminPSyn - Synthetic Nuclei Segmentation
# ========================================================================
print("="*80)
print("EXAMPLE 1: VitaminPSyn - Synthetic Nuclei Detection")
print("="*80)

# Setup model
device = 'cuda'
model = VitaminPSyn(model_size='base').to(device)
model.load_state_dict(torch.load("checkpoints/vitamin_p_syn_base_fold12_best.pth", map_location=device))
model.eval()



  from .autonotebook import tqdm as notebook_tqdm


EXAMPLE 1: VitaminPSyn - Synthetic Nuclei Detection
Building H&E encoder with DINOv2-base
Building Synthetic MIF encoder with DINOv2-base
Building shared encoder with DINOv2-base
‚úì VitaminPSyn initialized with base backbone
  Embed dim: 768 | Decoder dims: [768, 384, 192, 96]


VitaminPSyn(
  (he_backbone): DINOv2Backbone(
    (dinov2): VisionTransformer(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 768, kernel_size=(14, 14), stride=(14, 14))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (patch_drop): Identity()
      (norm_pre): Identity()
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=768, out_features=2304, bias=True)
            (q_norm): Identity()
            (k_norm): Identity()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (proj): Linear(in_features=768, out_features=768, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): LayerScale()
          (drop_path1): Identity()
          (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(


In [12]:
# Create predictor with GAN checkpoint
predictor = WSIPredictor(
    model=model,
    device='cuda',
    patch_size=512,
    overlap=64,
    target_mpp=0.5013,
    magnification=20,
    gan_checkpoint_path="checkpoints/pix2pix_he_to_mif_final.pth"  # ‚Üê GAN for synthetic MIF
)

# Run inference - just provide H&E slide!
# The GAN will automatically generate synthetic MIF in the background
results = predictor.predict(
    wsi_path='test_images/CRC31-he_chunk_14.png',  # ‚Üê Only H&E needed!
    output_dir='results_syn_nuclei',
    branch='syn_nuclei',  # ‚Üê Automatically uses GAN + MIF predictions
    filter_tissue=True,
    tissue_threshold=0.05,
    clean_overlaps=True,
    save_geojson=True,
    detection_threshold=0.5,
    min_area_um=5.0,
)

print(f"‚úÖ Found {results['num_detections']} nuclei in {results['processing_time']:.2f}s")
print(f"   Output saved to: {results['output_dir']}")




2026-01-19 19:04:45 - WSIPredictor - INFO - üé® Loading GAN generator from checkpoints/pix2pix_he_to_mif_final.pth
2026-01-19 19:04:46 - WSIPredictor - INFO -    ‚úì GAN generator loaded successfully
2026-01-19 19:04:46 - WSIPredictor - INFO - WSIPredictor initialized:
2026-01-19 19:04:46 - WSIPredictor - INFO -   Device: cuda
2026-01-19 19:04:46 - WSIPredictor - INFO -   Model type: VitaminPSyn (H&E + Synthetic MIF)
2026-01-19 19:04:46 - WSIPredictor - INFO -   Patch size: 512
2026-01-19 19:04:46 - WSIPredictor - INFO -   Overlap: 64
2026-01-19 19:04:46 - WSIPredictor - INFO -   Magnification: 40
2026-01-19 19:04:46 - WSIPredictor - INFO -    ‚ö† No MPP in metadata, using default: 0.2630 Œºm/px
2026-01-19 19:04:46 - WSIPredictor - INFO - üîç Resolution matching:
2026-01-19 19:04:46 - WSIPredictor - INFO -    WSI MPP: 0.2630 Œºm/px
2026-01-19 19:04:46 - WSIPredictor - INFO -    Model training MPP: 0.2630 Œºm/px
2026-01-19 19:04:46 - WSIPredictor - INFO -    Scale factor: 1.00x
2026-0

   Virtual upscaled size: 512x512 (from 512x512)
   Scanning 1x1 tile grid...
   Tissue dilation: 1 ‚Üí 1 tiles (+0 boundary tiles)


Processing tiles: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  6.79it/s]
2026-01-19 19:04:46 - WSIPredictor - INFO -    ‚úì Extracted 97 instances from tiles (before cleaning)
2026-01-19 19:04:46 - WSIPredictor - INFO - üßπ Cleaning overlapping instances at tile boundaries...
2026-01-19 19:04:46 - WSIPredictor - INFO -    Found 32 edge cells to check for overlaps
2026-01-19 19:04:46 - WSIPredictor - INFO - Starting overlap cleaning with 97 detections
2026-01-19 19:04:46 - WSIPredictor - INFO - Split detections: 0 mid, 97 edge
2026-01-19 19:04:46 - WSIPredictor - INFO - Iteration 1: Found 0 overlaps, keeping 97/97 detections
2026-01-19 19:04:46 - WSIPredictor - INFO - No more overlaps found, stopping early
2026-01-19 19:04:46 - WSIPredictor - INFO - Cleaning complete: 97 -> 97 (0 removed)
2026-01-19 19:04:46 - WSIPredictor - INFO -    ‚úì After cleaning: 97 instances
2026-01-19 19:04:46 - WSIPredictor - INFO - üíæ Saving results to results_syn_nuclei...
2026-01-19 19:04:46 

‚úÖ Found 97 nuclei in 0.29s
   Output saved to: results_syn_nuclei


In [11]:
# ========================================================================
# EXAMPLE 2: VitaminPSyn - Synthetic Cell Segmentation
# ========================================================================
print("\n" + "="*80)
print("EXAMPLE 2: VitaminPSyn - Synthetic Cell Detection")
print("="*80)

results = predictor.predict(
    wsi_path='test_images/ovarian-he_chunk_92.png',
    output_dir='results_syn_cell',
    branch='syn_cell',  # ‚Üê Uses synthetic MIF for cell segmentation
    filter_tissue=True,
    clean_overlaps=True,
    save_geojson=True,
    detection_threshold=0.5,
    min_area_um=20.0,  # Larger threshold for cells
)

print(f"‚úÖ Found {results['num_detections']} cells in {results['processing_time']:.2f}s")


2026-01-19 19:00:08 - WSIPredictor - INFO -    ‚ö† No MPP in metadata, using default: 0.2630 Œºm/px
2026-01-19 19:00:08 - WSIPredictor - INFO - üîç Resolution matching:
2026-01-19 19:00:08 - WSIPredictor - INFO -    WSI MPP: 0.2630 Œºm/px
2026-01-19 19:00:08 - WSIPredictor - INFO -    Model training MPP: 0.2630 Œºm/px
2026-01-19 19:00:08 - WSIPredictor - INFO -    Scale factor: 1.00x
2026-01-19 19:00:08 - WSIPredictor - INFO -    Min area filter: 20.0 Œºm¬≤ = 289 pixels¬≤
2026-01-19 19:00:08 - WSIPredictor - INFO - üé® Opening H&E WSI for synthetic MIF generation: test_images/ovarian-he_chunk_92.png
2026-01-19 19:00:08 - WSIPredictor - INFO -    ‚úì Size: 512x512 pixels
2026-01-19 19:00:08 - WSIPredictor - INFO - üìê Extracting tile positions...
2026-01-19 19:00:08 - WSIPredictor - INFO -    ‚úì Created 1 tiles (1x1 grid)
2026-01-19 19:00:08 - WSIPredictor - INFO -    ‚úì Tissue tiles: 1/1 (100.0%)
2026-01-19 19:00:08 - WSIPredictor - INFO - üß† Running predictions and extracting i


EXAMPLE 2: VitaminPSyn - Synthetic Cell Detection
   Virtual upscaled size: 512x512 (from 512x512)
   Scanning 1x1 tile grid...
   Tissue dilation: 1 ‚Üí 1 tiles (+0 boundary tiles)


Processing tiles: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  5.37it/s]
2026-01-19 19:00:08 - WSIPredictor - INFO -    ‚úì Extracted 90 instances from tiles (before cleaning)
2026-01-19 19:00:08 - WSIPredictor - INFO - üßπ Cleaning overlapping instances at tile boundaries...
2026-01-19 19:00:08 - WSIPredictor - INFO -    Found 36 edge cells to check for overlaps
2026-01-19 19:00:08 - WSIPredictor - INFO - Starting overlap cleaning with 90 detections
2026-01-19 19:00:08 - WSIPredictor - INFO - Split detections: 0 mid, 90 edge
2026-01-19 19:00:08 - WSIPredictor - INFO - Iteration 1: Found 0 overlaps, keeping 90/90 detections
2026-01-19 19:00:08 - WSIPredictor - INFO - No more overlaps found, stopping early
2026-01-19 19:00:08 - WSIPredictor - INFO - Cleaning complete: 90 -> 90 (0 removed)
2026-01-19 19:00:08 - WSIPredictor - INFO -    ‚úì After cleaning: 90 instances
2026-01-19 19:00:08 - WSIPredictor - INFO - üíæ Saving results to results_syn_cell...
2026-01-19 19:00:08 - 

‚úÖ Found 90 cells in 0.35s


In [14]:
# Create predictor with GAN checkpoint
predictor = WSIPredictor(
    model=model,
    device='cuda',
    patch_size=512,
    overlap=64,
    target_mpp=0.5013,
    magnification=20,
    gan_checkpoint_path="checkpoints/pix2pix_he_to_mif_final.pth"  # ‚Üê GAN for synthetic MIF
)
# ========================================================================
# EXAMPLE 3: Process Multiple Branches at Once
# ========================================================================
print("\n" + "="*80)
print("EXAMPLE 3: Process Multiple Syn Branches")
print("="*80)

results_multi = predictor.predict(
    wsi_path='test_images/MS004_HE_center_5000x5000.png',
    output_dir='results_syn_multi',
    branches=['syn_nuclei', 'syn_cell'],  # ‚Üê Both branches!
    filter_tissue=True,
    clean_overlaps=True,
    save_geojson=True,
)

for branch_name, branch_results in results_multi.items():
    print(f"\n{branch_name}:")
    print(f"  Detections: {branch_results['num_detections']}")
    print(f"  Time: {branch_results['processing_time']:.2f}s")
    print(f"  Output: {branch_results['output_dir']}")



2026-01-19 19:07:04 - WSIPredictor - INFO - üé® Loading GAN generator from checkpoints/pix2pix_he_to_mif_final.pth
2026-01-19 19:07:04 - WSIPredictor - INFO -    ‚úì GAN generator loaded successfully
2026-01-19 19:07:04 - WSIPredictor - INFO - WSIPredictor initialized:
2026-01-19 19:07:04 - WSIPredictor - INFO -   Device: cuda
2026-01-19 19:07:04 - WSIPredictor - INFO -   Model type: VitaminPSyn (H&E + Synthetic MIF)
2026-01-19 19:07:04 - WSIPredictor - INFO -   Patch size: 512
2026-01-19 19:07:04 - WSIPredictor - INFO -   Overlap: 64
2026-01-19 19:07:04 - WSIPredictor - INFO -   Magnification: 20
2026-01-19 19:07:04 - WSIPredictor - INFO - 
2026-01-19 19:07:04 - WSIPredictor - INFO - Processing branch: syn_nuclei



EXAMPLE 3: Process Multiple Syn Branches


2026-01-19 19:07:05 - WSIPredictor - INFO -    ‚ö† No MPP in metadata, using default: 0.5013 Œºm/px
2026-01-19 19:07:05 - WSIPredictor - INFO - üîç Resolution matching:
2026-01-19 19:07:05 - WSIPredictor - INFO -    WSI MPP: 0.5013 Œºm/px
2026-01-19 19:07:05 - WSIPredictor - INFO -    Model training MPP: 0.2630 Œºm/px
2026-01-19 19:07:05 - WSIPredictor - INFO -    Scale factor: 1.91x
2026-01-19 19:07:05 - WSIPredictor - INFO -    Min area filter: 3.0 Œºm¬≤ = 12 pixels¬≤
2026-01-19 19:07:05 - WSIPredictor - INFO - üé® Opening H&E WSI for synthetic MIF generation: test_images/MS004_HE_center_5000x5000.png
2026-01-19 19:07:06 - WSIPredictor - INFO -    ‚úì Size: 5000x5000 pixels
2026-01-19 19:07:06 - WSIPredictor - INFO - üìê Extracting tile positions...


   Virtual upscaled size: 9530x9530 (from 5000x5000)
   Scanning 22x22 tile grid...


2026-01-19 19:07:08 - WSIPredictor - INFO -    ‚úì Created 484 tiles (22x22 grid)
2026-01-19 19:07:08 - WSIPredictor - INFO -    ‚úì Tissue tiles: 484/484 (100.0%)
2026-01-19 19:07:08 - WSIPredictor - INFO - üß† Running predictions and extracting instances on syn_nuclei...


   Tissue dilation: 467 ‚Üí 484 tiles (+17 boundary tiles)


Processing tiles: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 484/484 [01:15<00:00,  6.43it/s]
2026-01-19 19:08:24 - WSIPredictor - INFO -    ‚úì Extracted 50120 instances from tiles (before cleaning)
2026-01-19 19:08:24 - WSIPredictor - INFO - üßπ Cleaning overlapping instances at tile boundaries...
2026-01-19 19:08:24 - WSIPredictor - INFO -    Found 14523 edge cells to check for overlaps
2026-01-19 19:08:26 - WSIPredictor - INFO - Starting overlap cleaning with 50120 detections
2026-01-19 19:08:26 - WSIPredictor - INFO - Split detections: 0 mid, 50120 edge
2026-01-19 19:08:32 - WSIPredictor - INFO - Iteration 1: Found 12633 overlaps, keeping 37487/50120 detections
2026-01-19 19:08:35 - WSIPredictor - INFO - Iteration 2: Found 651 overlaps, keeping 36836/37487 detections
2026-01-19 19:08:39 - WSIPredictor - INFO - Iteration 3: Found 40 overlaps, keeping 36796/36836 detections
2026-01-19 19:08:42 - WSIPredictor - INFO - Iteration 4: Found 4 overlaps, keeping 36792/36796 detections
2026-01-1

   Virtual upscaled size: 9530x9530 (from 5000x5000)
   Scanning 22x22 tile grid...


2026-01-19 19:09:07 - WSIPredictor - INFO -    ‚úì Created 484 tiles (22x22 grid)
2026-01-19 19:09:07 - WSIPredictor - INFO -    ‚úì Tissue tiles: 484/484 (100.0%)
2026-01-19 19:09:07 - WSIPredictor - INFO - üß† Running predictions and extracting instances on syn_cell...


   Tissue dilation: 467 ‚Üí 484 tiles (+17 boundary tiles)


Processing tiles: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 484/484 [01:25<00:00,  5.66it/s]
2026-01-19 19:10:32 - WSIPredictor - INFO -    ‚úì Extracted 57314 instances from tiles (before cleaning)
2026-01-19 19:10:33 - WSIPredictor - INFO - üßπ Cleaning overlapping instances at tile boundaries...
2026-01-19 19:10:33 - WSIPredictor - INFO -    Found 17255 edge cells to check for overlaps
2026-01-19 19:10:36 - WSIPredictor - INFO - Starting overlap cleaning with 57314 detections
2026-01-19 19:10:36 - WSIPredictor - INFO - Split detections: 0 mid, 57314 edge
2026-01-19 19:10:45 - WSIPredictor - INFO - Iteration 1: Found 15669 overlaps, keeping 41645/57314 detections
2026-01-19 19:10:52 - WSIPredictor - INFO - Iteration 2: Found 1074 overlaps, keeping 40571/41645 detections
2026-01-19 19:10:58 - WSIPredictor - INFO - Iteration 3: Found 127 overlaps, keeping 40444/40571 detections
2026-01-19 19:11:04 - WSIPredictor - INFO - Iteration 4: Found 22 overlaps, keeping 40422/40444 detections
2026-0


syn_nuclei:
  Detections: 36792
  Time: 118.10s
  Output: results_syn_multi/syn_nuclei

syn_cell:
  Detections: 40409
  Time: 167.60s
  Output: results_syn_multi/syn_cell


In [None]:
# ========================================================================
# EXAMPLE 4: VitaminPFlex - MIF Only (for comparison)
# ========================================================================
print("\n" + "="*80)
print("EXAMPLE 4: VitaminPFlex - Real MIF (comparison)")
print("="*80)

from vitaminp import VitaminPFlex

# Setup Flex model
model_flex = VitaminPFlex(model_size='large').to(device)
model_flex.load_state_dict(torch.load("checkpoints/vitamin_p_flex_large_fold2_best.pth", map_location=device))
model_flex.eval()

# Create channel config
config = ChannelConfig(
    nuclear_channel=0,
    membrane_channel=[1, 2],
    membrane_combination='max',
    channel_names={0: 'SYTO13', 1: 'Cy3', 2: 'TexasRed'}
)

# Create predictor (no GAN needed for Flex)
predictor_flex = WSIPredictor(
    model=model_flex,
    device='cuda',
    patch_size=512,
    overlap=64,
    target_mpp=0.263,
    magnification=20,
    mif_channel_config=config
)

# Run MIF inference
results_mif = predictor_flex.predict(
    wsi_path='test_images/ovarian-he_chunk_92.png',
    output_dir='results_flex_mif',
    branch='mif_nuclei',
    filter_tissue=False,
    clean_overlaps=True,
    save_geojson=True,
    detection_threshold=0.5,
    min_area_um=5.0,
)

print(f"‚úÖ Found {results_mif['num_detections']} nuclei in {results_mif['processing_time']:.2f}s")

# ========================================================================
# COMPARISON: Syn vs Real MIF
# ========================================================================
print("\n" + "="*80)
print("COMPARISON SUMMARY")
print("="*80)
print(f"Synthetic MIF (from H&E): {results['num_detections']} nuclei")
print(f"Real MIF:                 {results_mif['num_detections']} nuclei")
print("="*80)