In [None]:
# Install required packages
# pip install openslide-python pillow numpy matplotlib

import os
os.add_dll_directory(r'C:\Users\curti\OpenSlide\bin')

import openslide
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

filepath = r'C:\Users\curti\PycharmProjects\Crary_Lab\slides\41998.svs'

# Open WSI file
slide = openslide.OpenSlide(filepath)

# Basic properties
print(f"Dimensions: {slide.dimensions}")
print(f"Level count: {slide.level_count}")
print(f"Level dimensions: {slide.level_dimensions}")
print(f"Downsample factors: {slide.level_downsamples}")

# Get a thumbnail for visualization
thumbnail = slide.get_thumbnail((1000, 1000))
plt.imshow(thumbnail)
plt.title("WSI Thumbnail")
plt.show()

In [None]:
def tile_wsi(slide, tile_size=224, level=0, overlap=0):
    """
    Tile a WSI into smaller patches
    
    Args:
        slide: OpenSlide object
        tile_size: Size of each tile (pixels)
        level: Pyramid level to use (0 = highest resolution)
        overlap: Overlap between tiles (pixels)
    
    Returns:
        List of tiles with their coordinates
    """
    # 1. Get dimensions at the specified pyramid level
    width, height = slide.level_dimensions[level]
    downsample = slide.level_downsamples[level]
    
    tiles = []
    
    # 2. Calculate stride (step size between tiles)
    stride = tile_size - overlap
    
    # 3. Iterate through the slide in a grid pattern
    for y in range(0, height, stride):
        for x in range(0, width, stride):
            # 4. Convert coordinates to level 0 (base resolution)
            x_level0 = int(x * downsample)
            y_level0 = int(y * downsample)
            
            # 5. Read the tile from the slide
            tile = slide.read_region(
                (x_level0, y_level0), 
                level, 
                (tile_size, tile_size)
            )
            
            # 6. Store tile with metadata
            tiles.append({
                'image': tile.convert('RGB'),
                'x': x_level0,
                'y': y_level0,
                'level': level
            })
    
    return tiles

""" Whole Slide Images are **massive** (often 100,000 × 100,000 pixels or larger). To handle this, they're stored as **image pyramids**:

Level 0 (highest resolution):  100,000 × 100,000 pixels  (original scan)
Level 1:                        50,000 × 50,000 pixels   (2× downsampled)
Level 2:                        25,000 × 25,000 pixels   (4× downsampled)
Level 3:                        12,500 × 12,500 pixels   (8× downsampled)
"""

In [None]:
# Install required packages
# pip install torch torchvision timm huggingface_hub

import torch
import timm
from huggingface_hub import login, hf_hub_download
from dotenv import load_dotenv

# Load token and connect to HF
load_dotenv()
token = os.getenv("HUGGINGFACE_TOKEN")
if token:
    login(token=token)
else:
    raise ValueError("HUGGINGFACE_TOKEN not found in environment variables")

# Load UNI model
model = timm.create_model(
    "hf-hub:mahmoodLab/uni",
    pretrained=True,
    init_values=1e-5,
    dynamic_img_size=True
)
model.eval()

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Preprocessing
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])

# Extract embeddings
def extract_embeddings(tiles, model, transform, device):
    """Extract embeddings for all tiles"""
    embeddings = []
    
    with torch.no_grad():
        for tile_data in tiles:
            tile = tile_data['image']
            tile_tensor = transform(tile).unsqueeze(0).to(device)
            
            # Get embedding
            embedding = model(tile_tensor)
            embeddings.append({
                'embedding': embedding.cpu().numpy(),
                'x': tile_data['x'],
                'y': tile_data['y']
            })
            
    return embeddings

In [None]:
from huggingface_hub import HfApi, login
# Test API access
api = HfApi()

# Check if you can access UNI
try:
    model_info = api.model_info("MahmoodLab/UNI")
    print("✓ Successfully accessed UNI model!")
    print(f"  Model ID: {model_info.modelId}")
    print(f"  Downloads: {model_info.downloads}")
    print(f"  Gated: {model_info.gated}")
    
except Exception as e:
    print("✗ Cannot access UNI model")
    print(f"  Error: {e}")
    print("\nPlease:")
    print("1. Visit https://huggingface.co/MahmoodLab/UNI")
    print("2. Click 'Request access' if you haven't")
    print("3. Wait for approval email")