Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,33 +74,33 @@ requires-python = ">=3.11, <3.14"

dependencies = [
# From Template
"fastapi[standard,all]>=0.121.1,<1",
"fastapi[standard,all]>=0.121.3,<1",
"humanize>=4.14.0,<5",
"nicegui[native]>=3.1.0,<3.2.0", # Regression in 3.2.0
"nicegui[native]>=3.3.1,<4", # Regression in 3.2.0
"packaging>=25.0,<26",
"platformdirs>=4.5.0,<5",
"psutil>=7.1.3,<8",
"pydantic-settings>=2.12.0,<3",
"pywin32>=310,<311 ; sys_platform == 'win32'",
"pyyaml>=6.0.3,<7",
"sentry-sdk>=2.44.0,<3",
"sentry-sdk>=2.45.0,<3",
"typer>=0.20.0,<1",
"uptime>=3.0.1,<4",
# Custom
"aiopath>=0.6.11,<1",
"boto3>=1.40.61,<2",
"boto3>=1.41.0,<2",
"certifi>=2025.11.12,<2026",
"defusedxml>=0.7.1",
"dicom-validator>=0.7.3,<1",
"dicomweb-client[gcp]>=0.59.3,<1",
"duckdb>=0.10.0,<=1.4.1",
"fastparquet>=2024.11.0,<2025",
"google-cloud-storage>=3.5.0,<4",
"google-cloud-storage>=3.6.0,<4",
"google-crc32c>=1.7.1,<2",
"highdicom>=0.26.1,<1",
"html-sanitizer>=2.6.0,<3",
"httpx>=0.28.1,<1",
"idc-index-data==22.1.2",
"idc-index-data==22.1.5",
"ijson>=3.4.0.post0,<4",
"jsf>=0.11.2,<1",
"jsonschema[format-nongpl]>=4.25.1,<5",
Expand Down Expand Up @@ -131,7 +131,7 @@ jupyter = ["jupyter>=1.1.1,<2"]
marimo = [
"cloudpathlib>=0.23.0,<1",
"ipython>=9.7.0,<10",
"marimo>=0.17.7,<1",
"marimo>=0.17.8,<1",
"matplotlib>=3.10.7,<4",
"shapely>=2.1.0,<3",
]
Expand Down Expand Up @@ -165,7 +165,7 @@ dev = [
"pytest-timeout>=2.4.0,<3",
"pytest-watcher>=0.4.3,<1",
"pytest-xdist[psutil]>=3.8.0,<4",
"ruff>=0.14.4,<1",
"ruff>=0.14.5,<1",
"scalene>=1.5.55,<2",
"sphinx>=8.2.3,<9",
"sphinx-autobuild>=2025.8.25,<2026",
Expand Down
54 changes: 53 additions & 1 deletion src/aignostics/application/_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,22 @@ def run_execute( # noqa: PLR0913, PLR0917
validate_only: Annotated[
bool, typer.Option(help="If True, cancel the run post validation, before analysis.")
] = False,
gpu_type: Annotated[
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Enhancement Suggestion:

Consider using enum types directly for validation instead of strings:

from aignostics.platform._sdk_metadata import GPUType, ProvisioningMode

gpu_type: Annotated[
    GPUType,  # Use enum directly
    typer.Option(help="GPU type to use for processing."),
] = GPUType.A100

Benefits:

  • ✅ Type safety at runtime
  • ✅ IDE autocomplete support
  • ✅ Automatic validation (rejects invalid values like "A200")
  • ✅ Consistent with internal models

Current approach is acceptable - this is an enhancement, not a bug.

str,
typer.Option(help="GPU type to use for processing (L4 or A100)."),
] = "A100",
gpu_provisioning_mode: Annotated[
str,
typer.Option(help="GPU provisioning mode (SPOT or ON_DEMAND)."),
] = "ON_DEMAND",
max_gpus_per_slide: Annotated[
int,
typer.Option(help="Maximum number of GPUs to allocate per slide (1-8).", min=1, max=8),
] = 1,
cpu_provisioning_mode: Annotated[
str,
typer.Option(help="CPU provisioning mode (SPOT or ON_DEMAND)."),
] = "ON_DEMAND",
) -> None:
"""Prepare metadata, upload data to platform, and submit an application run, then incrementally download results.

Expand Down Expand Up @@ -401,10 +417,15 @@ def run_execute( # noqa: PLR0913, PLR0917
metadata_csv_file=metadata_csv_file,
application_version=application_version,
note=note,
tags=None,
due_date=due_date,
deadline=deadline,
onboard_to_aignostics_portal=onboard_to_aignostics_portal,
validate_only=validate_only,
gpu_type=gpu_type,
gpu_provisioning_mode=gpu_provisioning_mode,
max_gpus_per_slide=max_gpus_per_slide,
cpu_provisioning_mode=cpu_provisioning_mode,
)
result_download(
run_id=run_id,
Expand Down Expand Up @@ -652,6 +673,22 @@ def run_submit( # noqa: PLR0913, PLR0917
validate_only: Annotated[
bool, typer.Option(help="If True, cancel the run post validation, before analysis.")
] = False,
gpu_type: Annotated[
str,
typer.Option(help="GPU type to use for processing (L4 or A100)."),
] = "A100",
gpu_provisioning_mode: Annotated[
str,
typer.Option(help="GPU provisioning mode (SPOT or ON_DEMAND)."),
] = "ON_DEMAND",
max_gpus_per_slide: Annotated[
int,
typer.Option(help="Maximum number of GPUs to allocate per slide (1-8).", min=1, max=8),
] = 1,
cpu_provisioning_mode: Annotated[
str,
typer.Option(help="CPU provisioning mode (SPOT or ON_DEMAND)."),
] = "ON_DEMAND",
) -> str:
"""Submit run by referencing the metadata CSV file.

Expand Down Expand Up @@ -701,11 +738,26 @@ def run_submit( # noqa: PLR0913, PLR0917
app_version.version_number,
metadata_dict,
)

# Build custom metadata with pipeline configuration
custom_metadata = {
"pipeline": {
"gpu": {
"gpu_type": gpu_type,
"provisioning_mode": gpu_provisioning_mode,
"max_gpus_per_slide": max_gpus_per_slide,
},
"cpu": {
"provisioning_mode": cpu_provisioning_mode,
},
},
}

application_run = Service().application_run_submit_from_metadata(
application_id=application_id,
metadata=metadata_dict,
application_version=application_version,
custom_metadata=None, # TODO(Helmut): Add support for custom metadata
custom_metadata=custom_metadata,
note=note,
tags={tag.strip() for tag in tags.split(",") if tag.strip()} if tags else None,
due_date=due_date,
Expand Down
94 changes: 93 additions & 1 deletion src/aignostics/application/_gui/_page_application_describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ class SubmitForm:
deadline: str = (datetime.now().astimezone() + timedelta(hours=24)).strftime("%Y-%m-%d %H:%M")
validate_only: bool = False
onboard_to_aignostics_portal: bool = False
gpu_type: str = "A100"
gpu_provisioning_mode: str = "ON_DEMAND"
max_gpus_per_slide: int = 1
cpu_provisioning_mode: str = "ON_DEMAND"


submit_form = SubmitForm()
Expand Down Expand Up @@ -702,11 +706,25 @@ def _submit() -> None:
"""Submit the application run."""
ui.notify("Submitting application run ...", type="info")
try:
# Build custom metadata with pipeline configuration
custom_metadata = {
"pipeline": {
"gpu": {
"gpu_type": submit_form.gpu_type,
"provisioning_mode": submit_form.gpu_provisioning_mode,
"max_gpus_per_slide": submit_form.max_gpus_per_slide,
},
"cpu": {
"provisioning_mode": submit_form.cpu_provisioning_mode,
},
},
}

run = service.application_run_submit_from_metadata(
application_id=str(submit_form.application_id),
metadata=submit_form.metadata or [],
application_version=str(submit_form.application_version),
custom_metadata=None, # TODO(Helmut): Allow user to edit custom metadata
custom_metadata=custom_metadata,
note=submit_form.note,
tags=set(submit_form.tags) if submit_form.tags else None,
due_date=datetime.strptime(submit_form.due_date, "%Y-%m-%d %H:%M")
Expand Down Expand Up @@ -816,6 +834,80 @@ def _update_upload_progress() -> None:
break
_upload_ui.refresh(submit_form.metadata)

with ui.step("Pipeline"):
user_info: UserInfo | None = app.storage.tab.get("user_info", None)
can_configure_pipeline = (
user_info
and user_info.organization
and user_info.organization.name
and user_info.organization.name.lower() in {"aignostics", "pre-alpha-org", "lmu", "charite"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Security & Scalability Consideration:

Hardcoding organization names in the frontend for feature gating has maintainability implications:

user_info.organization.name.lower() in {"aignostics", "pre-alpha-org", "lmu", "charite"}

Suggestions:

  1. Backend Authorization: Move this logic to the API backend and check via a user permission/feature flag
  2. Configuration-based: Store allowed organizations in _settings.py or environment variables
  3. Feature Flag Service: Use a feature flag system for gradual rollout

Current Risk: Low (alpha feature with known orgs)
Future Risk: Medium (requires code deployment for each new organization)

Why it matters: As this feature rolls out to more organizations, changing code for each org request is not sustainable for a medical device SDK.

)

if can_configure_pipeline:
with ui.column(align_items="start").classes("w-full"):
ui.label("GPU Configuration").classes("text-h6 mb-0 pb-0")
ui.label(
"Configure GPU resources for processing your whole slide images. "
"These settings control the type and provisioning mode of GPUs used during AI analysis."
).classes("text-sm mt-0 pt-0 mb-4")

with ui.row().classes("w-full gap-4"):
ui.select(
label="GPU Type",
options={"L4": "L4", "A100": "A100"},
value=submit_form.gpu_type,
).bind_value(submit_form, "gpu_type").mark("SELECT_GPU_TYPE").classes("w-1/3")

ui.number(
label="Max GPUs per Slide",
value=submit_form.max_gpus_per_slide,
min=1,
max=8,
step=1,
).bind_value(submit_form, "max_gpus_per_slide").mark("NUMBER_MAX_GPUS_PER_SLIDE").classes(
"w-1/3"
)

ui.select(
label="GPU Provisioning Mode",
options={
"SPOT": "Spot nodes (lower cost, better availability, might be preempted and retried)",
"ON_DEMAND": (
"On demand nodes (higher cost, limited availability, processing might be delayed)"
),
},
value=submit_form.gpu_provisioning_mode,
).bind_value(submit_form, "gpu_provisioning_mode").mark("SELECT_GPU_PROVISIONING_MODE").classes(
"w-1/3"
)

ui.separator().classes("my-4")

ui.label("CPU Configuration").classes("text-h6 mb-0 pb-0")
ui.label("Configure CPU resources for algorithms that do not require GPU acceleration.").classes(
"text-sm mt-0 pt-0 mb-4"
)

with ui.row().classes("w-full gap-4"):
ui.select(
label="CPU Provisioning Mode",
options={
"SPOT": "Spot nodes (lower cost, better availability, might be preempted and retried)",
"ON_DEMAND": "On demand nodes (higher cost, limited availability, might be delayed)",
},
value=submit_form.cpu_provisioning_mode,
).bind_value(submit_form, "cpu_provisioning_mode").mark("SELECT_CPU_PROVISIONING_MODE").classes(
"w-1/2"
)
else:
ui.label(
"Pipeline configuration is not available for your organization. Default settings will be used."
).classes("text-body1")

with ui.stepper_navigation():
ui.button("Next", on_click=stepper.next).mark("BUTTON_PIPELINE_NEXT")
ui.button("Back", on_click=stepper.previous).props("flat")

with ui.step("Submit"):
_upload_ui([])
ui.timer(0.1, callback=_update_upload_progress)
Expand Down
63 changes: 62 additions & 1 deletion src/aignostics/platform/_sdk_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,76 @@
import os
import sys
from datetime import UTC, datetime
from enum import StrEnum
from typing import Any, Literal

from loguru import logger
from pydantic import BaseModel, Field, ValidationError
from pydantic import BaseModel, Field, PositiveInt, ValidationError

from aignostics.utils import user_agent

SDK_METADATA_SCHEMA_VERSION = "0.0.4"
ITEM_SDK_METADATA_SCHEMA_VERSION = "0.0.3"

# Pipeline orchestration defaults
DEFAULT_GPU_TYPE = "A100"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Excellent Design Choice

Defining pipeline defaults as module-level constants:

  • ✅ Makes defaults explicit and discoverable
  • ✅ Easier to update across all usages
  • ✅ Testable (constants can be imported in tests)
  • ✅ Self-documenting for SDK users

This follows best practices for configuration management in enterprise software.

DEFAULT_MAX_GPUS_PER_SLIDE = 1
DEFAULT_GPU_PROVISIONING_MODE = "ON_DEMAND"
DEFAULT_CPU_PROVISIONING_MODE = "ON_DEMAND"


class GPUType(StrEnum):
"""Type of GPU to use for processing."""

L4 = "L4"
A100 = "A100"


class ProvisioningMode(StrEnum):
"""Provisioning mode for resources."""

SPOT = "SPOT"
ON_DEMAND = "ON_DEMAND"


class CPUConfig(BaseModel):
"""Configuration for CPU resources."""

provisioning_mode: ProvisioningMode = Field(
default_factory=lambda: ProvisioningMode(DEFAULT_CPU_PROVISIONING_MODE),
description="The provisioning mode for CPU resources (SPOT or ON_DEMAND)",
)


class GPUConfig(BaseModel):
"""Configuration for GPU resources."""

gpu_type: GPUType = Field(
default_factory=lambda: GPUType(DEFAULT_GPU_TYPE),
description="The type of GPU to use (L4 or A100)",
)
provisioning_mode: ProvisioningMode = Field(
default_factory=lambda: ProvisioningMode(DEFAULT_GPU_PROVISIONING_MODE),
description="The provisioning mode for GPU resources (SPOT or ON_DEMAND)",
)
max_gpus_per_slide: PositiveInt = Field(
default=DEFAULT_MAX_GPUS_PER_SLIDE,
description="The maximum number of GPUs to allocate per slide",
)


class PipelineConfig(BaseModel):
"""Pipeline configuration for dynamic orchestration."""

gpu: GPUConfig = Field(
default_factory=GPUConfig,
description="GPU resource configuration",
)
cpu: CPUConfig = Field(
default_factory=CPUConfig,
description="CPU resource configuration",
)


class SubmissionMetadata(BaseModel):
"""Metadata about how the SDK was invoked."""
Expand Down Expand Up @@ -121,6 +181,7 @@ class RunSdkMetadata(BaseModel):
note: str | None = Field(None, description="Optional user note for the run")
workflow: WorkflowMetadata | None = Field(None, description="Workflow control flags")
scheduling: SchedulingMetadata | None = Field(None, description="Scheduling information")
pipeline: PipelineConfig | None = Field(None, description="Pipeline orchestration configuration")

model_config = {"extra": "forbid"} # Reject unknown fields

Expand Down
5 changes: 0 additions & 5 deletions tests/aignostics/application/cli_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Tests to verify the CLI functionality of the application module."""

import os
import platform
import re
from datetime import UTC, datetime, timedelta
Expand Down Expand Up @@ -254,10 +253,6 @@ def test_cli_run_submit_fails_on_missing_url(runner: CliRunner, tmp_path: Path,
assert "Invalid platform bucket URL: ''" in normalize_output(result.stdout)


@pytest.mark.skipif(
os.getenv("AIGNOSTICS_PLATFORM_ENVIRONMENT", "staging") == "production",
reason="Broken when targeting production",
)
@pytest.mark.e2e
@pytest.mark.long_running
@pytest.mark.flaky(retries=3, delay=5)
Expand Down
14 changes: 14 additions & 0 deletions tests/aignostics/platform/e2e_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
from tests.constants_test import (
HETA_APPLICATION_ID,
HETA_APPLICATION_VERSION,
PIPELINE_CPU_PROVISIONING_MODE,
PIPELINE_GPU_PROVISIONING_MODE,
PIPELINE_GPU_TYPE,
PIPELINE_MAX_GPUS_PER_SLIDE,
SPOT_0_CRC32C,
SPOT_0_GS_URL,
SPOT_0_HEIGHT,
Expand Down Expand Up @@ -209,6 +213,16 @@ def _submit_and_validate( # noqa: PLR0913, PLR0917
"due_date": (datetime.now(tz=UTC) + timedelta(seconds=due_date_seconds)).isoformat(),
"deadline": deadline.isoformat(),
},
"pipeline": {
"gpu": {
"gpu_type": PIPELINE_GPU_TYPE,
"provisioning_mode": PIPELINE_GPU_PROVISIONING_MODE,
"max_gpus_per_slide": PIPELINE_MAX_GPUS_PER_SLIDE,
},
"cpu": {
"provisioning_mode": PIPELINE_CPU_PROVISIONING_MODE,
},
},
}
},
)
Expand Down
Loading
Loading