Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 0 additions & 21 deletions .github/workflows/pr-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,7 @@ on:
workflow_dispatch:

jobs:
regression-guards:
runs-on: ubuntu-latest
timeout-minutes: 10
permissions:
contents: read

steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Run regression guard tests
run: |
set -euo pipefail
python -m unittest discover -s test -p 'test_regression_bugfixes.py'

docker-test:
needs: regression-guards
runs-on: ubuntu-latest
timeout-minutes: 60
permissions:
Expand Down
2 changes: 1 addition & 1 deletion docs/benchmarking.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
The script samples a balanced subset of your manifest, runs untimed warmups plus repeated measured trials, tunes only:

- `model.batch_size`
- `speed.num_workers_embedding`
- `speed.num_dataloader_workers`

It keeps the rest of each model config fixed, disables previews / resume / Weights & Biases, and writes:

Expand Down
2 changes: 1 addition & 1 deletion docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ Common overrides:

- `output_dir=/path/to/output`
- `speed.num_gpus=4`
- `speed.num_workers_embedding=8`
- `speed.num_dataloader_workers=8`
- `tiling.preview.save=true`
- `model.name=...`
- `model.output_variant=...`
Expand Down
3 changes: 0 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,10 @@ classifiers = [
dependencies = [
"hs2p>=3.1.2",
"omegaconf",
"h5py",
"matplotlib",
"numpy<2",
"pandas",
"pillow",
"rich",
"tqdm",
"torch",
"torchvision",
"transformers",
Expand Down
3 changes: 2 additions & 1 deletion slide2vec/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from slide2vec.api import EmbeddedSlide, ExecutionOptions, Model, Pipeline, PreprocessingConfig, RunResult
from slide2vec.artifacts import SlideEmbeddingArtifact, TileEmbeddingArtifact
from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact


__version__ = "3.2.1"
Expand All @@ -12,6 +12,7 @@
"RunResult",
"EmbeddedSlide",
"SlideEmbeddingArtifact",
"HierarchicalEmbeddingArtifact",
"TileEmbeddingArtifact",
"__version__",
]
113 changes: 91 additions & 22 deletions slide2vec/api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@

import os
from dataclasses import dataclass, field, replace
from contextlib import contextmanager
from pathlib import Path
Expand All @@ -8,7 +7,11 @@
import torch
from hs2p import SlideSpec

from slide2vec.artifacts import SlideEmbeddingArtifact, TileEmbeddingArtifact
from slide2vec.artifacts import (
HierarchicalEmbeddingArtifact,
SlideEmbeddingArtifact,
TileEmbeddingArtifact,
)
from slide2vec.encoders.registry import (
encoder_registry,
resolve_preprocessing_defaults,
Expand All @@ -17,6 +20,7 @@
from slide2vec.model_settings import canonicalize_model_name, normalize_precision_name
from slide2vec.progress import emit_progress
from slide2vec.runtime_types import LoadedModel
from slide2vec.utils.utils import slurm_cpu_limit

PathLike = str | Path

Expand All @@ -38,6 +42,8 @@ class PreprocessingConfig:
backend: str = "auto"
target_spacing_um: float | None = None
target_tile_size_px: int | None = None
target_region_size_px: int | None = None
region_tile_multiple: int | None = None
tolerance: float = 0.05
overlap: float = 0.0
tissue_threshold: float = 0.01
Expand Down Expand Up @@ -69,6 +75,16 @@ def from_config(cls, cfg: Any) -> "PreprocessingConfig":
backend=tiling.backend,
target_spacing_um=float(tiling.params.target_spacing_um),
target_tile_size_px=int(tiling.params.target_tile_size_px),
target_region_size_px=(
int(v)
if (v := getattr(tiling.params, "target_region_size_px", None)) is not None
else None
),
region_tile_multiple=(
int(v)
if (v := getattr(tiling.params, "region_tile_multiple", None)) is not None
else None
),
tolerance=float(tiling.params.tolerance),
overlap=float(tiling.params.overlap),
tissue_threshold=float(tiling.params.tissue_threshold),
Expand Down Expand Up @@ -140,17 +156,10 @@ def __post_init__(self) -> None:
raise ValueError("ExecutionOptions.num_gpus must be at least 1")
if self.prefetch_factor < 1:
raise ValueError("ExecutionOptions.prefetch_factor must be at least 1")
slurm_cpu_limit = None
for env_name in ("SLURM_CPUS_PER_TASK", "SLURM_CPUS_ON_NODE", "SLURM_JOB_CPUS_PER_NODE"):
if env_name not in os.environ:
continue
value = os.environ[env_name]
if value and value.strip().isdigit() and int(value.strip()) > 0:
slurm_cpu_limit = int(value.strip())
break
if slurm_cpu_limit is not None:
object.__setattr__(self, "num_workers", min(self.num_workers, slurm_cpu_limit))
object.__setattr__(self, "num_preprocessing_workers", min(self.num_preprocessing_workers, slurm_cpu_limit))
limit = slurm_cpu_limit()
if limit is not None:
object.__setattr__(self, "num_workers", min(self.num_workers, limit))
object.__setattr__(self, "num_preprocessing_workers", min(self.num_preprocessing_workers, limit))

def with_output_dir(self, output_dir: PathLike | None) -> "ExecutionOptions":
if output_dir is None:
Expand All @@ -161,6 +170,7 @@ def with_output_dir(self, output_dir: PathLike | None) -> "ExecutionOptions":
@dataclass(frozen=True, kw_only=True)
class RunResult:
tile_artifacts: list[TileEmbeddingArtifact]
hierarchical_artifacts: list[HierarchicalEmbeddingArtifact]
slide_artifacts: list[SlideEmbeddingArtifact]
process_list_path: Path | None = None

Expand Down Expand Up @@ -228,7 +238,7 @@ def embed_tiles(
*,
preprocessing: PreprocessingConfig | None = None,
execution: ExecutionOptions | None = None,
) -> list[TileEmbeddingArtifact]:
) -> list[TileEmbeddingArtifact] | list[HierarchicalEmbeddingArtifact]:
from slide2vec.inference import embed_tiles

resolved = _coerce_execution_options(execution, model=self)
Expand Down Expand Up @@ -353,6 +363,25 @@ def run(
execution=self.execution,
)

def run_with_coordinates(
self,
coordinates_dir: str | Path,
*,
slides: SlideSequence | None = None,
) -> RunResult:
from slide2vec.inference import run_pipeline_with_coordinates

with _auto_progress_reporting(output_dir=self.execution.output_dir):
resolved_preprocessing = _resolve_direct_api_preprocessing(self.model, self.preprocessing)
_validate_model_config(self.model, resolved_preprocessing, self.execution)
return run_pipeline_with_coordinates(
self.model,
coordinates_dir=coordinates_dir,
slides=slides,
preprocessing=resolved_preprocessing,
execution=self.execution,
)


def _coerce_execution_options(
options: ExecutionOptions | None,
Expand Down Expand Up @@ -398,10 +427,12 @@ def ensure_defaults() -> tuple[int, float]:

if preprocessing is None:
target_tile_size_px, target_spacing_um = ensure_defaults()
return PreprocessingConfig(
backend="auto",
target_spacing_um=target_spacing_um,
target_tile_size_px=target_tile_size_px,
return _resolve_hierarchical_preprocessing(
PreprocessingConfig(
backend="auto",
target_spacing_um=target_spacing_um,
target_tile_size_px=target_tile_size_px,
)
)

target_spacing_um = preprocessing.target_spacing_um
Expand All @@ -412,10 +443,12 @@ def ensure_defaults() -> tuple[int, float]:
target_spacing_um = default_spacing_um
if target_tile_size_px is None:
target_tile_size_px = default_tile_size_px
return replace(
preprocessing,
target_spacing_um=target_spacing_um,
target_tile_size_px=target_tile_size_px,
return _resolve_hierarchical_preprocessing(
replace(
preprocessing,
target_spacing_um=target_spacing_um,
target_tile_size_px=target_tile_size_px,
)
)


Expand All @@ -438,6 +471,10 @@ def _validate_model_config(
name = model.name
if name not in encoder_registry:
return
if preprocessing.region_tile_multiple is not None or preprocessing.target_region_size_px is not None:
info = encoder_registry.info(name)
if info["level"] != "tile":
raise ValueError("Hierarchical preprocessing is only supported for tile encoders")
# Skip precision validation for CPU execution (fp32 is always valid on CPU).
on_cpu = model._requested_device == "cpu"
precision = None if on_cpu or execution is None else execution.precision
Expand All @@ -451,6 +488,38 @@ def _validate_model_config(
)


def _resolve_hierarchical_preprocessing(preprocessing: PreprocessingConfig) -> PreprocessingConfig:
multiple = preprocessing.region_tile_multiple
target_region_size_px = preprocessing.target_region_size_px
if multiple is not None:
multiple = int(multiple)
if multiple < 2:
raise ValueError("region_tile_multiple must be at least 2")
if multiple is None and target_region_size_px is None:
return preprocessing
if preprocessing.target_tile_size_px is None:
raise ValueError(
"target_tile_size_px must be resolved before deriving hierarchical region geometry"
)
if target_region_size_px is None:
target_region_size_px = int(preprocessing.target_tile_size_px) * int(multiple)
elif multiple is None:
if int(target_region_size_px) % int(preprocessing.target_tile_size_px) != 0:
raise ValueError(
"target_region_size_px must be an exact multiple of target_tile_size_px"
)
multiple = int(target_region_size_px) // int(preprocessing.target_tile_size_px)
elif int(target_region_size_px) != int(preprocessing.target_tile_size_px) * int(multiple):
raise ValueError(
"target_region_size_px must match target_tile_size_px * region_tile_multiple"
)
return replace(
preprocessing,
target_region_size_px=int(target_region_size_px),
region_tile_multiple=int(multiple),
)


@contextmanager
def _auto_progress_reporting(*, output_dir: PathLike | None):
from slide2vec.progress import (
Expand Down
Loading
Loading