diff --git a/.claude/launch.json b/.claude/launch.json new file mode 100644 index 000000000..1bd95af54 --- /dev/null +++ b/.claude/launch.json @@ -0,0 +1,32 @@ +{ + "version": "0.0.1", + "configurations": [ + { + "name": "launchpad-production", + "runtimeExecutable": "bash", + "runtimeArgs": [ + "-c", + "AIGNOSTICS_API_ROOT=https://platform.aignostics.com uv run aignostics system serve --host 127.0.0.1 --port 8801" + ], + "port": 8801 + }, + { + "name": "launchpad-staging", + "runtimeExecutable": "bash", + "runtimeArgs": [ + "-c", + "AIGNOSTICS_API_ROOT=https://platform-staging.aignostics.com uv run aignostics system serve --host 127.0.0.1 --port 8802" + ], + "port": 8802 + }, + { + "name": "launchpad-dev", + "runtimeExecutable": "bash", + "runtimeArgs": [ + "-c", + "AIGNOSTICS_API_ROOT=https://platform-dev.aignostics.ai uv run aignostics system serve --host 127.0.0.1 --port 8803" + ], + "port": 8803 + } + ] +} diff --git a/.claude/scheduled_tasks.lock b/.claude/scheduled_tasks.lock new file mode 100644 index 000000000..f12613462 --- /dev/null +++ b/.claude/scheduled_tasks.lock @@ -0,0 +1 @@ +{"sessionId":"141e42d1-2313-4aa6-8525-e62a6191f6d1","pid":79441,"procStart":"Sun Apr 26 08:58:29 2026","acquiredAt":1777200880667} diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 000000000..d6e1f23b7 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,39 @@ +{ + "permissions": { + "allow": [ + "mcp__claude_ai_Atlassian__getConfluencePage", + "mcp__claude_ai_Atlassian__getVisibleJiraProjects", + "mcp__claude_ai_Atlassian__getAccessibleAtlassianResources", + "mcp__claude_ai_Atlassian__getCompassComponent", + "mcp__claude_ai_Atlassian__getJiraIssue", + "mcp__claude_ai_Atlassian__searchJiraIssuesUsingJql", + "mcp__claude_ai_Atlassian__search", + "mcp__claude_ai_Atlassian__getConfluenceSpaces", + "mcp__claude_ai_Atlassian__getPagesInConfluenceSpace", + "mcp__claude-in-chrome__read_page", + "mcp__claude-in-chrome__get_page_text", + "mcp__claude-in-chrome__tabs_context_mcp", + "Bash(make lint)", + "Bash(make lint_fix)", + "Bash(make test_unit)", + "Bash(make test_integration)", + "Bash(make audit)", + "Bash(gh label list *)", + "Bash(gh search *)" + ] + }, + "enableAllProjectMcpServers": true, + "enabledMcpjsonServers": [], + "extraKnownMarketplaces": { + "aignostics-claude-plugins": { + "source": { + "source": "github", + "repo": "aignostics/claude-plugins" + }, + "autoUpdate": true + } + }, + "enabledPlugins": { + "qms@aignostics-claude-plugins": true + } +} \ No newline at end of file diff --git a/.github/CLAUDE.md b/.github/CLAUDE.md index 5f15f6cca..2b5d19bc2 100644 --- a/.github/CLAUDE.md +++ b/.github/CLAUDE.md @@ -90,6 +90,7 @@ The Aignostics Python SDK uses a **sophisticated multi-stage CI/CD pipeline** bu | Workflow | Purpose | Duration | Key Outputs | |----------|---------|----------|-------------| | **_lint.yml** | Code quality (ruff, pyright, mypy) | ~5 min | Formatted code, type safety | +| **_docs.yml** | Documentation build (Sphinx) | ~3 min | HTML docs, validation | | **_audit.yml** | Security + license compliance | ~3 min | SBOM (CycloneDX, SPDX), vulnerabilities, licenses | | **_test.yml** | Multi-stage test execution | ~15 min | Coverage reports, JUnit XML | | **_codeql.yml** | Security vulnerability scanning | ~10 min | CodeQL SARIF results | @@ -115,10 +116,12 @@ The SDK has **7 test categories** with different execution strategies. def test_something(): pass + # ❌ INCORRECT - No category marker, will NOT run in CI def test_something_else(): pass + # ✅ CORRECT - Multiple markers including category @pytest.mark.e2e @pytest.mark.long_running @@ -1099,6 +1102,7 @@ make dist_native | `audit-scheduled.yml` | Entry | Security audit | ~5 min | | `codeql-scheduled.yml` | Entry | CodeQL scan | ~10 min | | `_lint.yml` | Reusable | Code quality checks | ~5 min | +| `_docs.yml` | Reusable | Documentation build | ~3 min | | `_audit.yml` | Reusable | Security & license | ~3 min | | `_test.yml` | Reusable | Test execution | ~15 min | | `_codeql.yml` | Reusable | Security scanning | ~10 min | diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 61eabdb41..3e38c276a 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -96,13 +96,14 @@ make lint # Ruff formatting + MyPy type checking ```python from aignostics.utils import BaseService, Health + class Service(BaseService): def __init__(self): super().__init__(SettingsClass) # Optional settings - + def health(self) -> Health: return Health(status=Health.Code.UP) - + def info(self, mask_secrets: bool = True) -> dict: return {"version": "1.0.0"} ``` @@ -114,6 +115,7 @@ from ._service import Service cli = typer.Typer(name="module", help="Module description") + @cli.command("action") def action_command(param: str): """Command description.""" @@ -126,6 +128,7 @@ def action_command(param: str): ```python from nicegui import ui + def create_page(): ui.label("Module Interface") # Components auto-register with GUI launcher @@ -177,9 +180,10 @@ def create_page(): ```python from pydantic_settings import BaseSettings + class Settings(BaseSettings): api_root: str = "https://platform.aignostics.com" - + class Config: env_prefix = "AIGNOSTICS_" ``` @@ -200,7 +204,7 @@ client = platform.Client() run = client.runs.create( application_id="heta", application_version="1.0.0", # version number without 'v' prefix, omit for latest - items=[platform.InputItem(...)] + items=[platform.InputItem(...)], ) # 3. Monitor & download diff --git a/.github/labels.yml b/.github/labels.yml index f5c45cd28..b316634cd 100644 --- a/.github/labels.yml +++ b/.github/labels.yml @@ -117,6 +117,14 @@ description: Trigger Claude Code automation color: "b41d8f" +- name: claude:review:passed + description: Automated Claude PR review found no blocking issues on the current head commit + color: "0e8a16" + +- name: claude:review:failed + description: Automated Claude PR review found blocking issues on the current head commit + color: "b60205" + - name: copilot description: GitHub Copilot related color: "e6dac6" @@ -133,3 +141,76 @@ - name: documentation-drift description: Documentation out of sync with code color: "ff6b6b" + +# SOP Labels — governance trail on every PR (one mandatory) +- name: sop:pr-sop-01 + description: PR-SOP-01 Problem Resolution (bug / anomaly fix) + color: "5319e7" + +- name: sop:cc-sop-01 + description: CC-SOP-01 Change Control (feature / planned change) + color: "1d76db" + +# Type Labels — conventional-commits taxonomy (one per PR) +# Extends the legacy `bug` / `documentation` / `enhancement` labels with +# the rest of the conventional-commit vocabulary. Legacy labels remain +# for backward-compatibility with issue templates and external tooling; +# the `type:*` namespace is the source of truth for PR-level filtering. +- name: type:feature + description: New functionality (conventional feat) + color: "a2eeef" + +- name: type:fix + description: Bug fix (conventional fix) + color: "d73a4a" + +- name: type:chore + description: Tooling, maintenance, routine task (conventional chore) + color: "c5def5" + +- name: type:refactor + description: Refactor without behaviour change + color: "fbca04" + +- name: type:docs + description: Documentation-only change + color: "0075ca" + +- name: type:test + description: Test-only change + color: "006b75" + +- name: type:perf + description: Performance improvement + color: "4b0082" + +- name: type:build + description: Build / packaging change + color: "5319e7" + +- name: type:ci + description: CI/CD change + color: "000000" + +# Security Labels — orthogonal axis (0–2 per PR) +- name: security + description: Addresses a security advisory, CVE, or hardens security posture + color: "b60205" + +- name: security:supply-chain + description: Supply-chain (dependency) vulnerability remediation + color: "d93f0b" + +# Scope Labels — who the change affects (0–1 per PR) +- name: scope:sdk-consumers + description: Affects downstream SDK consumers (uvx aignostics / uv add aignostics) + color: "0e8a16" + +- name: scope:dev-only + description: Affects only our dev/CI env; consumers unaffected + color: "bfdadc" + +# Automation Labels +- name: auto-merge + description: Eligible for auto-merge once CI is green + color: "0e8a16" diff --git a/.github/workflows/_audit.yml b/.github/workflows/_audit.yml index 69feb9749..0cf8e1459 100644 --- a/.github/workflows/_audit.yml +++ b/.github/workflows/_audit.yml @@ -37,7 +37,7 @@ jobs: run: make audit - name: Upload audit results - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 if: ${{ always() && (env.GITHUB_WORKFLOW_RUNTIME != 'ACT') }} with: name: audit-results diff --git a/.github/workflows/_build-native-only.yml b/.github/workflows/_build-native-only.yml index 8ea2a4944..e46e239fc 100644 --- a/.github/workflows/_build-native-only.yml +++ b/.github/workflows/_build-native-only.yml @@ -61,7 +61,7 @@ jobs: run: make dist_native - name: Upload dist_native artifacts - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 if: ${{ always() && (env.GITHUB_WORKFLOW_RUNTIME != 'ACT') }} with: name: dist_native-${{ matrix.runner }} diff --git a/.github/workflows/_claude-code.yml b/.github/workflows/_claude-code.yml index 4828f1e73..322a4da1c 100644 --- a/.github/workflows/_claude-code.yml +++ b/.github/workflows/_claude-code.yml @@ -77,7 +77,7 @@ jobs: - name: Run Claude Code (Interactive Mode) if: inputs.mode == 'interactive' - uses: anthropics/claude-code-action@6e2bd52842c65e914eba5c8badd17560bd26b5de # v1.0.89 + uses: anthropics/claude-code-action@b47fd721da662d48c5680e154ad16a73ed74d2e0 # v1.0.93 with: anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} track_progress: ${{ inputs.track_progress }} @@ -92,7 +92,7 @@ jobs: - name: Run Claude Code (Automation Mode) if: inputs.mode == 'automation' - uses: anthropics/claude-code-action@6e2bd52842c65e914eba5c8badd17560bd26b5de # v1.0.89 + uses: anthropics/claude-code-action@b47fd721da662d48c5680e154ad16a73ed74d2e0 # v1.0.93 with: anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} track_progress: ${{ inputs.track_progress }} diff --git a/.github/workflows/_docker-publish.yml b/.github/workflows/_docker-publish.yml index 732ff8c9b..83a7694da 100644 --- a/.github/workflows/_docker-publish.yml +++ b/.github/workflows/_docker-publish.yml @@ -81,7 +81,7 @@ jobs: - name: "(all target): Build and push Docker image" id: build-and-push-all - uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0 + uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0 with: context: . file: ./Dockerfile @@ -93,7 +93,7 @@ jobs: - name: "(slim target): Build and push Docker image" id: build-and-push-slim - uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0 + uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0 with: context: . file: ./Dockerfile diff --git a/.github/workflows/_docs.yml b/.github/workflows/_docs.yml new file mode 100644 index 000000000..01f19594f --- /dev/null +++ b/.github/workflows/_docs.yml @@ -0,0 +1,37 @@ +name: "> Docs" + +on: + workflow_call: + # No secrets needed + +jobs: + docs: + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + packages: read + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + + - name: Install uv + uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0 + with: + version-file: "pyproject.toml" + enable-cache: true + cache-dependency-glob: uv.lock + + - name: Install dev tools + shell: bash + run: .github/workflows/_install_dev_tools.bash + + - name: Install Python, venv and dependencies + shell: bash + run: uv sync --all-extras --frozen --link-mode=copy + + - name: Docs + shell: bash + run: make docs diff --git a/.github/workflows/_package-publish.yml b/.github/workflows/_package-publish.yml index 16ba3e945..f2e78ebe4 100644 --- a/.github/workflows/_package-publish.yml +++ b/.github/workflows/_package-publish.yml @@ -83,7 +83,7 @@ jobs: run: make dist_native - name: Upload dist_native artifacts - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 if: ${{ always() && (env.GITHUB_WORKFLOW_RUNTIME != 'ACT') }} with: name: dist_native-${{ matrix.runner }} diff --git a/.github/workflows/_scheduled-audit.yml b/.github/workflows/_scheduled-audit.yml index e18c402fa..8309325e5 100644 --- a/.github/workflows/_scheduled-audit.yml +++ b/.github/workflows/_scheduled-audit.yml @@ -87,7 +87,7 @@ jobs: exit $EXIT_CODE - name: Upload test results - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 if: ${{ always() && (env.GITHUB_WORKFLOW_RUNTIME != 'ACT') }} with: name: test-results-scheduled diff --git a/.github/workflows/_scheduled-test-daily.yml b/.github/workflows/_scheduled-test-daily.yml index 05bc4853b..572dd4081 100644 --- a/.github/workflows/_scheduled-test-daily.yml +++ b/.github/workflows/_scheduled-test-daily.yml @@ -182,7 +182,7 @@ jobs: summary-title: All very long running e2e tests passed - name: Upload test artifacts for inspection - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 if: ${{ always() && (env.GITHUB_WORKFLOW_RUNTIME != 'ACT') }} with: name: test-results-ubuntu-latest diff --git a/.github/workflows/_scheduled-test-hourly.yml b/.github/workflows/_scheduled-test-hourly.yml index 738554cfc..939366198 100644 --- a/.github/workflows/_scheduled-test-hourly.yml +++ b/.github/workflows/_scheduled-test-hourly.yml @@ -170,7 +170,7 @@ jobs: exit $EXIT_CODE - name: Upload test results - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 if: ${{ always() && (env.GITHUB_WORKFLOW_RUNTIME != 'ACT') }} with: name: test-results-scheduled diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml index dbb0708fb..85c4b021b 100644 --- a/.github/workflows/_test.yml +++ b/.github/workflows/_test.yml @@ -228,7 +228,7 @@ jobs: commit-message: ${{ inputs.commit_message }} - name: Upload test artifacts for inspection - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 if: ${{ always() && (env.GITHUB_WORKFLOW_RUNTIME != 'ACT') }} with: name: test-results-${{ matrix.runner }} diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml index da162873f..6919fedc6 100644 --- a/.github/workflows/ci-cd.yml +++ b/.github/workflows/ci-cd.yml @@ -93,6 +93,19 @@ jobs: id-token: write packages: read + docs: + needs: [get-commit-message] + if: | + (!contains(needs.get-commit-message.outputs.commit_message, 'skip:ci')) && + (!contains(needs.get-commit-message.outputs.commit_message, 'build:native:only')) && + (!contains(github.event.pull_request.labels.*.name, 'skip:ci')) && + (!contains(github.event.pull_request.labels.*.name, 'build:native:only')) + uses: ./.github/workflows/_docs.yml + permissions: + contents: read + id-token: write + packages: read + audit: needs: [get-commit-message] if: | @@ -172,7 +185,7 @@ jobs: SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} ketryx_report_and_check: - needs: [get-commit-message, lint, audit, test, codeql, sonarcloud] + needs: [get-commit-message, lint, audit, test, codeql, sonarcloud, docs] if: | github.actor != 'dependabot[bot]' && (!contains(needs.get-commit-message.outputs.commit_message, 'skip:ci')) && diff --git a/.github/workflows/claude-code-automation-pr-review.yml b/.github/workflows/claude-code-automation-pr-review.yml index 55d5f5929..11dc0e8a1 100644 --- a/.github/workflows/claude-code-automation-pr-review.yml +++ b/.github/workflows/claude-code-automation-pr-review.yml @@ -12,7 +12,8 @@ concurrency: jobs: claude-review: if: | - contains(github.event.pull_request.labels.*.name, 'claude') || + (github.event.action == 'labeled' && github.event.label.name == 'claude') || + (github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'claude')) || github.event.action == 'ready_for_review' uses: ./.github/workflows/_claude-code.yml with: @@ -214,6 +215,49 @@ jobs: Use `gh pr comment` with your Bash tool to leave your comprehensive review as a comment on the PR. + ## Machine-Readable Verdict (MANDATORY) + + After posting your review comment, you MUST emit a single-label verdict on the PR. This label is consumed by branch-protection rules to gate auto-merge — it is the only deterministic signal of your review outcome. + + **Verdict criteria** (all must hold for PASS): + + - No blocking findings under "CRITICAL CHECKS" — i.e. no missing test markers, no coverage drop below 85%, no `make lint` failures, no conventional-commit violations. + - No blocking architecture or security violations under "Repository-Specific Review Areas". + - Suggestions / nice-to-haves do NOT block the verdict. + + If any blocking finding remains: verdict is **FAIL**. + Otherwise: verdict is **PASS**. + + **Apply the label** (the two labels are mutually exclusive — always remove the opposite one): + + ```bash + # PASS: + gh pr edit ${{ github.event.pull_request.number }} \ + --add-label "claude:review:passed" \ + --remove-label "claude:review:failed" + + # FAIL: + gh pr edit ${{ github.event.pull_request.number }} \ + --add-label "claude:review:failed" \ + --remove-label "claude:review:passed" + ``` + + Note: `--remove-label` is a no-op if the label is not present, so it is safe to always include it. + + Also include the verdict as the final line of your sticky review comment, formatted exactly as: + + ``` + **Verdict**: ✅ claude:review:passed + ``` + + or + + ``` + **Verdict**: ❌ claude:review:failed + ``` + + This makes the verdict visible to humans without scrolling through all findings. + --- **Remember**: This is medical device software. Insist on highest standards. Be thorough, actionable, and kind. diff --git a/.github/workflows/merge-release.yml b/.github/workflows/merge-release.yml index b6443909e..6e571ef38 100644 --- a/.github/workflows/merge-release.yml +++ b/.github/workflows/merge-release.yml @@ -21,7 +21,7 @@ jobs: steps: - name: Generate GitHub App token id: app-token - uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3.0.0 + uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1 with: app-id: ${{ secrets.RELEASE_BOT_APP_ID }} private-key: ${{ secrets.RELEASE_BOT_PRIVATE_KEY }} diff --git a/.github/workflows/prepare-release.yml b/.github/workflows/prepare-release.yml index 737057847..aae309ccc 100644 --- a/.github/workflows/prepare-release.yml +++ b/.github/workflows/prepare-release.yml @@ -21,7 +21,7 @@ jobs: steps: - name: Generate GitHub App token id: app-token - uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3.0.0 + uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1 with: app-id: ${{ secrets.RELEASE_BOT_APP_ID }} private-key: ${{ secrets.RELEASE_BOT_PRIVATE_KEY }} diff --git a/.github/workflows/publish-release.yml b/.github/workflows/publish-release.yml index 8d79ef71a..8c91a9c61 100644 --- a/.github/workflows/publish-release.yml +++ b/.github/workflows/publish-release.yml @@ -21,7 +21,7 @@ jobs: steps: - name: Generate GitHub App token id: app-token - uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3.0.0 + uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1 with: app-id: ${{ secrets.RELEASE_BOT_APP_ID }} private-key: ${{ secrets.RELEASE_BOT_PRIVATE_KEY }} diff --git a/.gitignore b/.gitignore index dbf4155af..d9b9b1630 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,9 @@ # .gitignore of project Aignostics Python SDK +# Claude Code agent worktrees — created locally per session, never committed. +# Note: .claude/settings.json is intentionally tracked; only worktrees are ignored. +.claude/worktrees/ + # Environment .env .env.* @@ -86,13 +90,12 @@ CLAUDE.local.md # Scalene profile.json profile.html +scalene-profile.json +scalene-profile.html # Nicegui .nicegui -# MCP -.mcp.json - # Application specific data/** !data/.keep diff --git a/.mcp.json b/.mcp.json new file mode 100644 index 000000000..259a95910 --- /dev/null +++ b/.mcp.json @@ -0,0 +1,8 @@ +{ + "mcpServers": { + "playwright": { + "command": "npx", + "args": ["@playwright/mcp@latest"] + } + } +} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 90a9b6dc3..53093f827 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,7 +52,7 @@ repos: args: ["--baseline", ".secrets.baseline"] additional_dependencies: ["gibberish-detector"] - repo: https://github.com/astral-sh/uv-pre-commit - rev: 0.9.7 + rev: 0.11.6 # GHSA-pjjw-68hj-v9mw; matches [tool.uv] required-version hooks: - id: uv-lock - repo: local diff --git a/.vscode/extensions.json b/.vscode/extensions.json index afff625e2..56cafd1c2 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -2,8 +2,7 @@ "recommendations": [ "bierner.markdown-mermaid", "charliermarsh.ruff", - "codecov.codecov", - "daelonsuzuka.nicegui", +"daelonsuzuka.nicegui", "donjayamanne.python-environment-manager", "fill-labs.dependi", "github.vscode-github-actions", diff --git a/API_REFERENCE_v1.md b/API_REFERENCE_v1.md index d9d53c641..a3dc4d2a2 100644 --- a/API_REFERENCE_v1.md +++ b/API_REFERENCE_v1.md @@ -42,15 +42,12 @@ Base URLs: ```python import requests -headers = { - 'Accept': 'application/json', - 'Authorization': 'Bearer {access-token}' -} -r = requests.get('/api/v1/applications', headers = headers) +headers = {"Accept": "application/json", "Authorization": "Bearer {access-token}"} -print(r.json()) +r = requests.get("/api/v1/applications", headers=headers) +print(r.json()) ``` ```javascript @@ -208,15 +205,12 @@ OAuth2AuthorizationCodeBearer ```python import requests -headers = { - 'Accept': 'application/json', - 'Authorization': 'Bearer {access-token}' -} -r = requests.get('/api/v1/applications/{application_id}', headers = headers) +headers = {"Accept": "application/json", "Authorization": "Bearer {access-token}"} -print(r.json()) +r = requests.get("/api/v1/applications/{application_id}", headers=headers) +print(r.json()) ``` ```javascript @@ -295,15 +289,12 @@ OAuth2AuthorizationCodeBearer ```python import requests -headers = { - 'Accept': 'application/json', - 'Authorization': 'Bearer {access-token}' -} -r = requests.get('/api/v1/applications/{application_id}/versions/{version}', headers = headers) +headers = {"Accept": "application/json", "Authorization": "Bearer {access-token}"} -print(r.json()) +r = requests.get("/api/v1/applications/{application_id}/versions/{version}", headers=headers) +print(r.json()) ``` ```javascript @@ -571,15 +562,12 @@ OAuth2AuthorizationCodeBearer ```python import requests -headers = { - 'Accept': 'application/json', - 'Authorization': 'Bearer {access-token}' -} -r = requests.get('/api/v1/runs', headers = headers) +headers = {"Accept": "application/json", "Authorization": "Bearer {access-token}"} -print(r.json()) +r = requests.get("/api/v1/runs", headers=headers) +print(r.json()) ``` ```javascript @@ -962,16 +950,12 @@ OAuth2AuthorizationCodeBearer ```python import requests -headers = { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - 'Authorization': 'Bearer {access-token}' -} -r = requests.post('/api/v1/runs', headers = headers) +headers = {"Content-Type": "application/json", "Accept": "application/json", "Authorization": "Bearer {access-token}"} -print(r.json()) +r = requests.post("/api/v1/runs", headers=headers) +print(r.json()) ``` ```javascript @@ -1188,15 +1172,12 @@ OAuth2AuthorizationCodeBearer ```python import requests -headers = { - 'Accept': 'application/json', - 'Authorization': 'Bearer {access-token}' -} -r = requests.get('/api/v1/runs/{run_id}', headers = headers) +headers = {"Accept": "application/json", "Authorization": "Bearer {access-token}"} -print(r.json()) +r = requests.get("/api/v1/runs/{run_id}", headers=headers) +print(r.json()) ``` ```javascript @@ -1298,15 +1279,12 @@ OAuth2AuthorizationCodeBearer ```python import requests -headers = { - 'Accept': 'application/json', - 'Authorization': 'Bearer {access-token}' -} -r = requests.post('/api/v1/runs/{run_id}/cancel', headers = headers) +headers = {"Accept": "application/json", "Authorization": "Bearer {access-token}"} -print(r.json()) +r = requests.post("/api/v1/runs/{run_id}/cancel", headers=headers) +print(r.json()) ``` ```javascript @@ -1380,15 +1358,12 @@ OAuth2AuthorizationCodeBearer ```python import requests -headers = { - 'Accept': 'application/json', - 'Authorization': 'Bearer {access-token}' -} -r = requests.get('/api/v1/runs/{run_id}/items', headers = headers) +headers = {"Accept": "application/json", "Authorization": "Bearer {access-token}"} -print(r.json()) +r = requests.get("/api/v1/runs/{run_id}/items", headers=headers) +print(r.json()) ``` ```javascript @@ -1798,15 +1773,12 @@ OAuth2AuthorizationCodeBearer ```python import requests -headers = { - 'Accept': 'application/json', - 'Authorization': 'Bearer {access-token}' -} -r = requests.get('/api/v1/runs/{run_id}/items/{external_id}', headers = headers) +headers = {"Accept": "application/json", "Authorization": "Bearer {access-token}"} -print(r.json()) +r = requests.get("/api/v1/runs/{run_id}/items/{external_id}", headers=headers) +print(r.json()) ``` ```javascript @@ -1899,15 +1871,12 @@ OAuth2AuthorizationCodeBearer ```python import requests -headers = { - 'Accept': 'application/json', - 'Authorization': 'Bearer {access-token}' -} -r = requests.get('/api/v1/runs/{run_id}/artifacts/{artifact_id}/file', headers = headers) +headers = {"Accept": "application/json", "Authorization": "Bearer {access-token}"} -print(r.json()) +r = requests.get("/api/v1/runs/{run_id}/artifacts/{artifact_id}/file", headers=headers) +print(r.json()) ``` ```javascript @@ -1983,15 +1952,12 @@ OAuth2AuthorizationCodeBearer ```python import requests -headers = { - 'Accept': 'application/json', - 'Authorization': 'Bearer {access-token}' -} -r = requests.delete('/api/v1/runs/{run_id}/artifacts', headers = headers) +headers = {"Accept": "application/json", "Authorization": "Bearer {access-token}"} -print(r.json()) +r = requests.delete("/api/v1/runs/{run_id}/artifacts", headers=headers) +print(r.json()) ``` ```javascript @@ -2062,16 +2028,12 @@ OAuth2AuthorizationCodeBearer ```python import requests -headers = { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - 'Authorization': 'Bearer {access-token}' -} -r = requests.put('/api/v1/runs/{run_id}/custom-metadata', headers = headers) +headers = {"Content-Type": "application/json", "Accept": "application/json", "Authorization": "Bearer {access-token}"} -print(r.json()) +r = requests.put("/api/v1/runs/{run_id}/custom-metadata", headers=headers) +print(r.json()) ``` ```javascript @@ -2171,16 +2133,12 @@ OAuth2AuthorizationCodeBearer ```python import requests -headers = { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - 'Authorization': 'Bearer {access-token}' -} -r = requests.put('/api/v1/runs/{run_id}/items/{external_id}/custom-metadata', headers = headers) +headers = {"Content-Type": "application/json", "Accept": "application/json", "Authorization": "Bearer {access-token}"} -print(r.json()) +r = requests.put("/api/v1/runs/{run_id}/items/{external_id}/custom-metadata", headers=headers) +print(r.json()) ``` ```javascript @@ -2280,15 +2238,12 @@ OAuth2AuthorizationCodeBearer ```python import requests -headers = { - 'Accept': 'application/json', - 'Authorization': 'Bearer {access-token}' -} -r = requests.get('/api/v1/me', headers = headers) +headers = {"Accept": "application/json", "Authorization": "Bearer {access-token}"} -print(r.json()) +r = requests.get("/api/v1/me", headers=headers) +print(r.json()) ``` ```javascript diff --git a/CLAUDE.md b/CLAUDE.md index 34c3f8017..d0f31ad91 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -32,7 +32,7 @@ If you write code yourself, it is a strict requirement to validate your work on If you you are creating a pull request yourself: -* Add a label skip:test_long_running, to skip running long running tests. This is important because some tests in this repository are marked as long_running and can take a significant amount of time to complete. By adding this label, you help ensure that the CI pipeline runs efficiently and avoids unnecessary delays. +* Add a label skip:test:long_running, to skip running long running tests. This is important because some tests in this repository are marked as long_running and can take a significant amount of time to complete. By adding this label, you help ensure that the CI pipeline runs efficiently and avoids unnecessary delays. ## Module Documentation Index @@ -293,10 +293,7 @@ for app in client.applications.list(): print(app.application_id) # Submit run -run = client.runs.create( - application_id="heta", - files=["slide.svs"] -) +run = client.runs.create(application_id="heta", files=["slide.svs"]) ``` ### Service Discovery Pattern @@ -464,6 +461,7 @@ This ensures the JSON Schema is automatically regenerated during documentation b ```python from aignostics.utils import BaseService, Health + class Service(BaseService): """Module service implementation.""" @@ -484,6 +482,7 @@ from ._service import Service cli = typer.Typer(name="module", help="Module description") + @cli.command("action") def action_command(param: str): """Action description.""" @@ -709,8 +708,10 @@ details = run.details() if details.output.state == RunState.TERMINATED: if details.output.termination_reason == RunTerminationReason.ALL_ITEMS_PROCESSED: print(f"✅ Run complete: {details.output.statistics.succeeded} items succeeded") - print(f"❌ Failures: {details.output.statistics.user_error} user errors, " - f"{details.output.statistics.system_error} system errors") + print( + f"❌ Failures: {details.output.statistics.user_error} user errors, " + f"{details.output.statistics.system_error} system errors" + ) ``` See `platform/CLAUDE.md` for complete state machine diagrams and migration guide. @@ -866,10 +867,10 @@ The test suite uses pytest-xdist for parallel execution with intelligent distrib ```python # Worker factors control parallelism XDIST_WORKER_FACTOR = { - "unit": 0.0, # No parallelization (fast, no overhead needed) - "integration": 0.2, # 20% of logical CPUs - "e2e": 1.0, # 100% of logical CPUs (I/O bound) - "default": 1.0 # 100% for mixed test runs + "unit": 0.0, # No parallelization (fast, no overhead needed) + "integration": 0.2, # 20% of logical CPUs + "e2e": 1.0, # 100% of logical CPUs (I/O bound) + "default": 1.0, # 100% for mixed test runs } # Calculate workers: max(1, int(cpu_count * factor)) @@ -1009,6 +1010,7 @@ uv run pytest -m "e2e and not long_running" -v import pytest from unittest.mock import patch + @pytest.mark.unit def test_sdk_metadata_minimal(monkeypatch): """Test SDK metadata with clean environment.""" @@ -1163,7 +1165,7 @@ git push origin feat/my-feature gh pr create --title "feat: add operation caching" --body "Description..." # IMPORTANT: Add label to skip long-running tests -gh pr edit --add-label "skip:test_long_running" +gh pr edit --add-label "skip:test:long_running" ``` **PR triggers:** @@ -1367,7 +1369,7 @@ git commit -m "docs: update README [skip ci]" git commit -m "skip:ci: work in progress" # Add PR label to skip long-running tests -gh pr edit --add-label "skip:test_long_running" +gh pr edit --add-label "skip:test:long_running" ``` ### IDE Setup Recommendations diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ddb434ded..4ca08d935 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -269,18 +269,12 @@ client = Client() custom_metadata = { "experiment_id": "exp-2025-001", "dataset_version": "v2.1", - "custom_flags": { - "enable_feature_x": True - } + "custom_flags": {"enable_feature_x": True}, } # Submit run with custom metadata # SDK metadata is automatically added under the "sdk" key -run = client.runs.submit( - application_id="your-app", - items=[...], - custom_metadata=custom_metadata -) +run = client.runs.submit(application_id="your-app", items=[...], custom_metadata=custom_metadata) ``` The SDK will merge your custom metadata with its own tracking metadata, ensuring both are included in the run submission. The SDK metadata is always placed under the `sdk` key to avoid conflicts with your custom fields. diff --git a/Dockerfile b/Dockerfile index caabc7768..ab056b388 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,7 @@ FROM python:3.14.3-slim-trixie AS base FROM base AS builder # Copy in UV -COPY --from=ghcr.io/astral-sh/uv:0.9.18 /uv /bin/uv +COPY --from=ghcr.io/astral-sh/uv:0.11.7 /uv /bin/uv # We use the system interpreter managed by uv ENV UV_PYTHON_DOWNLOADS=0 diff --git a/Makefile b/Makefile index ad36c36ff..53e1c1bca 100644 --- a/Makefile +++ b/Makefile @@ -162,7 +162,8 @@ gui_watch: uv run runner/gui_watch.py profile: - uv run --all-extras python -m scalene runner/scalene.py + mkdir -p tmp + uv run --all-extras python -m scalene run runner/scalene.py --outfile tmp/scalene-profile.json && uv run --all-extras python -m scalene view tmp/scalene-profile.json # Signing: https://gist.github.com/bpteague/750906b9a02094e7389427d308ba1002 dist_native: diff --git a/README.md b/README.md index d5fb4c276..b728aa1cc 100644 --- a/README.md +++ b/README.md @@ -477,24 +477,24 @@ from aignostics import platform client = platform.Client() # submit an application run application_run = client.runs.submit( - application_id="test-app", - items=[ - platform.InputItem( - external_id="slide-1", - input_artifacts=[ - platform.InputArtifact( - name="whole_slide_image", - download_url="", - metadata={ - "checksum_base64_crc32c": "AAAAAA==", - "resolution_mpp": 0.25, - "width_px": 1000, - "height_px": 1000, - }, - ) - ], - ), - ], + application_id="test-app", + items=[ + platform.InputItem( + external_id="slide-1", + input_artifacts=[ + platform.InputArtifact( + name="whole_slide_image", + download_url="", + metadata={ + "checksum_base64_crc32c": "AAAAAA==", + "resolution_mpp": 0.25, + "width_px": 1000, + "height_px": 1000, + }, + ) + ], + ), + ], ) # wait for the results and download incrementally as they become available application_run.download_to_folder("path/to/download/folder") @@ -571,21 +571,23 @@ the latest version will be used automatically. Additionally, you need to define want to process in the run. The input items are defined as follows: ```python -platform.InputItem( - external_id="1", - input_artifacts=[ - platform.InputArtifact( - name="whole_slide_image", # defined by the application version's input artifact schema - download_url="", - metadata={ # defined by the application version's input artifact schema - "checksum_base64_crc32c": "N+LWCg==", - "resolution_mpp": 0.46499982, - "width_px": 3728, - "height_px": 3640, - }, - ) - ], -), +( + platform.InputItem( + external_id="1", + input_artifacts=[ + platform.InputArtifact( + name="whole_slide_image", # defined by the application version's input artifact schema + download_url="", + metadata={ # defined by the application version's input artifact schema + "checksum_base64_crc32c": "N+LWCg==", + "resolution_mpp": 0.46499982, + "width_px": 3728, + "height_px": 3640, + }, + ) + ], + ), +) ``` For each item you want to process, you need to provide a unique `reference` diff --git a/SECURITY.md b/SECURITY.md index b4203261b..b6a044cc5 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -31,6 +31,8 @@ d. **[trivy](https://trivy.dev/latest/)**: Pre commit to GitHub scans Python dep e. **[ox.security](https://www.ox.security/)**: Monitors dependencies for vulnerabilities pre and post release on GitHub. +How we handle vulnerabilities in our Python dependency supply chain — including the default path of raising lower bounds in `pyproject.toml`, and the list of advisories we have consciously accepted because no upstream fix is available yet — is documented in [SUPPLY_CHAIN_VULNERABILITIES.md](SUPPLY_CHAIN_VULNERABILITIES.md). + ### 2. License Compliance Checks and Software Bill of Materials (SBOM) a. **[pip-licenses](https://pypi.org/project/pip-licenses/)**: Inspects and matches the licenses of all dependencies with allow list to ensure compliance with licensing requirements and avoid using components with problematic licenses. `licenses.csv`, `licenses.json` and `licenses_grouped.json` published [per release](https://github.com/aignostics/python-sdk/releases). diff --git a/SUPPLY_CHAIN_VULNERABILITIES.md b/SUPPLY_CHAIN_VULNERABILITIES.md new file mode 100644 index 000000000..cc14b1b30 --- /dev/null +++ b/SUPPLY_CHAIN_VULNERABILITIES.md @@ -0,0 +1,132 @@ +# Supply-Chain Vulnerabilities + +This document describes how aignostics handles vulnerabilities in its Python +dependency supply chain. + +Aignostics is consumed both as an application (`uvx aignostics`, or the +Launchpad) and as a library that users add to their own project with +`uv add aignostics`, `pip install aignostics`, or any other resolver. In +both cases the user's tooling picks dependency versions against the +dependency metadata we publish with the aignostics package — our own +lockfile and any development-only constraints we apply locally are +invisible to consumers. The only way to keep a consumer from resolving a +known-vulnerable version of a dependency is therefore to set an +appropriate lower bound on the dependency metadata we publish, including +on transitive dependencies. + +This document covers: + +- **How we protect consumers** — our default response when a scanner + reports an advisory, and the fallback path when no upstream fix is + available yet. +- **Active acceptances** — the (short) list of advisories we have + consciously accepted because no upstream fix exists or the + vulnerability is not exploitable in how aignostics uses the package. + This is where **the actual residual risk lives**: each row records + severity, scope, downstream-exposure assessment, rationale, and the + condition under which the acceptance expires. +- **Enforced lower bounds for CVE protection** — the complete catalog of + lower bounds we currently set specifically to shield consumers from + advisories that already have an upstream fix. + +See [SECURITY.md](SECURITY.md) for the scanners that feed this process +(`pip-audit`, Dependabot, Renovate, trivy, ox.security). + +## How we protect consumers + +This is sharper than the default practice in the Python ecosystem, where +many libraries constrain only their direct dependencies or rely on their +own lockfile for protection. We deliberately maintain explicit lower +bounds on direct *and* transitive dependencies, annotated with the +relevant CVE / GHSA id, so both `uvx aignostics` and `uv add aignostics` +give consumers a dependency tree free of the advisories we are aware of. + +Our policy per finding: + +- **Upstream fix available** — raise the lower bound of the affected + package with an inline CVE / GHSA comment and refresh our lockfile. + The bound goes on the existing line for direct dependencies, and in + the transitive-overrides block of the same section for transitive + ones. If the package is only reachable through an optional extra the + bound goes in that extra; if the package is genuinely dev-only it goes + in the dev-group constraints. +- **No upstream fix yet, or not exploitable in our use** — add the + advisory to the "Active acceptances" table below (with severity, + downstream-exposure assessment, rationale, and a removal condition) + and suppress it in `pip-audit` so it does not block CI. We never + silently ignore a finding. + +## Active acceptances + +This is where the actual residual risk lives. Each row is an advisory for +which no upstream fix has been released yet — or for which the +vulnerability is not exploitable in how aignostics uses the package — +together with severity, exposure assessment, rationale, and the condition +under which the acceptance expires. + +- **Applies to** says when the vulnerable package reaches a consumer's + tree: *always* (every install), *with the `` extra* (only consumers + who install that extra), or *dev only* (never reaches consumers; + affects our tooling only). +- **Downstream exposure** says whether accepting the advisory leaves + consumers at risk: *None* (e.g. the package is dev-only for us), + *Partial* (only if a specific extra is installed), or *Full* (every + consumer install inherits the vulnerable version). + +| Advisory | Package (affected) | Severity | Applies to | Downstream exposure | Published | Accepted | Revisit by | Fix status | Rationale | Removal condition | Accepted via | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | +| _No active acceptances. All known advisories that affect aignostics or its dependencies have an upstream fix that is reflected as an enforced lower bound below._ | — | — | — | — | — | — | — | — | — | — | — | + +"Revisit by" is a soft deadline: when it passes, we re-check whether the +upstream fix landed or whether a newer advisory superseded this one. For +dev-only entries we revisit quarterly; for runtime entries with a known +upcoming fix, monthly. + +## Enforced lower bounds for CVE protection + +Every lower bound below is set with an inline CVE / GHSA comment so it is +visible to anyone reading the project metadata. These are the constraints +that prevent downstream consumers from resolving a known-vulnerable +version. + +- **Applies to** tells you when the constraint reaches a consumer's + dependency tree: *always* (affects every `uvx aignostics` and + `uv add aignostics`), *with the `` extra* (affects only consumers + who install that extra), or *dev only* (never reaches a consumer; + applies to our own tooling). +- **Severity** is the highest CVSS rating among the advisories protected + against, as reported by NVD. +- **Since** is the calendar date on which the lower bound was introduced + into `main`. + +| Package | Constraint | Protects against | Severity | Applies to | Since | +| --- | --- | --- | --- | --- | --- | +| `pip` | `>=26.1` | [CVE-2025-8869](https://nvd.nist.gov/vuln/detail/CVE-2025-8869) (≥25.3); [CVE-2026-3219](https://nvd.nist.gov/vuln/detail/CVE-2026-3219) / [GHSA-58qw-9mgm-455v](https://github.com/advisories/GHSA-58qw-9mgm-455v) (≥26.1, [pypa/pip#13870](https://github.com/pypa/pip/pull/13870)) | Medium | dev only | 2026-04-01 (>=5.3 — ineffective); 2026-04-24 raised to ≥25.3; 2026-04-27 raised to ≥26.1 | +| `nicegui[native]` | `>=3.11.0,<4` | [CVE-2026-21871](https://nvd.nist.gov/vuln/detail/CVE-2026-21871), [CVE-2026-21873](https://nvd.nist.gov/vuln/detail/CVE-2026-21873), [CVE-2026-21874](https://nvd.nist.gov/vuln/detail/CVE-2026-21874) (≥3.5.0); [CVE-2026-25516](https://nvd.nist.gov/vuln/detail/CVE-2026-25516) (≥3.7.0); [CVE-2026-27156](https://nvd.nist.gov/vuln/detail/CVE-2026-27156) (≥3.8.0); [CVE-2026-33332](https://nvd.nist.gov/vuln/detail/CVE-2026-33332) (≥3.9.0); [CVE-2026-39844](https://nvd.nist.gov/vuln/detail/CVE-2026-39844) (≥3.10.0) | Medium | always | 2026-01-09 (≥3.5.0); 2026-04-24 raised to ≥3.9.0; 2026-04-26 raised to ≥3.11.0 (#531) | +| `pyjwt[crypto]` | `>=2.12.0,<3` | [CVE-2026-32597](https://nvd.nist.gov/vuln/detail/CVE-2026-32597) | High | always | 2026-04-24 | +| `requests` | `>=2.33.0,<3` | [CVE-2026-25645](https://nvd.nist.gov/vuln/detail/CVE-2026-25645) | Medium | always | 2026-03-26 | +| `urllib3` | `>=2.6.3,<3` | [CVE-2026-21441](https://nvd.nist.gov/vuln/detail/CVE-2026-21441) | Medium | always | 2026-01-08 | +| `h11` | `>=0.16.0` | [CVE-2025-43859](https://nvd.nist.gov/vuln/detail/CVE-2025-43859) | Critical | always | 2025-12-10 | +| `tornado` | `>=6.5.5` | [CVE-2025-47287](https://nvd.nist.gov/vuln/detail/CVE-2025-47287) (≥6.5.0); [GHSA-78cv-mqj4-43f7](https://github.com/advisories/GHSA-78cv-mqj4-43f7) (≥6.5.5) | High | always | 2025-12-10 (≥6.5.0); 2026-04-24 raised to ≥6.5.5 | +| `urllib3` | `>=2.5.0` | [CVE-2025-50181](https://nvd.nist.gov/vuln/detail/CVE-2025-50181), [CVE-2025-50182](https://nvd.nist.gov/vuln/detail/CVE-2025-50182) | Medium | always | 2025-12-10 | +| `pillow` | `>=12.2.0` | [CVE-2025-48379](https://nvd.nist.gov/vuln/detail/CVE-2025-48379) (≥11.3.0); [CVE-2026-25990](https://nvd.nist.gov/vuln/detail/CVE-2026-25990) (≥12.1.1); [CVE-2026-40192](https://nvd.nist.gov/vuln/detail/CVE-2026-40192) (≥12.2.0) | High | always | 2025-12-10 (≥11.3.0); 2026-04-24 raised to ≥12.2.0 | +| `aiohttp` | `>=3.13.4` | [CVE-2025-53643](https://nvd.nist.gov/vuln/detail/CVE-2025-53643), CVE-2025-69223..69230 (≥3.13.3); [CVE-2026-22815](https://nvd.nist.gov/vuln/detail/CVE-2026-22815) (≥3.13.4) | High | always | 2026-01-06 (≥3.13.3); 2026-04-24 raised to ≥3.13.4 | +| `starlette` | `>=0.49.1` | [CVE-2025-54121](https://nvd.nist.gov/vuln/detail/CVE-2025-54121) (≥0.47.2); [GHSA-7f5h-v6xp-fcq8](https://github.com/advisories/GHSA-7f5h-v6xp-fcq8) (≥0.49.1) | Medium | always | 2025-12-10 | +| `lxml` | `>=6.1.0` | [CVE-2026-41066](https://nvd.nist.gov/vuln/detail/CVE-2026-41066) | High | always | 2025-12-10 (≥6.0.2); 2026-04-24 raised to ≥6.1.0 | +| `filelock` | `>=3.20.3` | [CVE-2025-68146](https://nvd.nist.gov/vuln/detail/CVE-2025-68146) (≥3.20.1); [CVE-2026-22701](https://nvd.nist.gov/vuln/detail/CVE-2026-22701) (≥3.20.3) | Medium | always | 2025-12-17 (≥3.20.1); 2026-04-24 raised to ≥3.20.3 | +| `marshmallow` | `>=3.26.2` | [CVE-2025-68480](https://nvd.nist.gov/vuln/detail/CVE-2025-68480) | Medium | always | 2025-12-23 | +| `pygments` | `>=2.20.0` | [CVE-2026-4539](https://nvd.nist.gov/vuln/detail/CVE-2026-4539) | Medium | always | 2026-04-24 | +| `cryptography` | `>=46.0.7` | [CVE-2026-39892](https://nvd.nist.gov/vuln/detail/CVE-2026-39892) | Medium | always | 2026-04-24 | +| `pydicom` | `>=3.0.2` | [CVE-2026-32711](https://nvd.nist.gov/vuln/detail/CVE-2026-32711) | High | always | 2026-04-24 | +| `pyasn1` | `>=0.6.3` | [CVE-2026-30922](https://nvd.nist.gov/vuln/detail/CVE-2026-30922) | High | always | 2026-04-24 | +| `lxml-html-clean` | `>=0.4.4` | [CVE-2026-28348](https://nvd.nist.gov/vuln/detail/CVE-2026-28348), [CVE-2026-28350](https://nvd.nist.gov/vuln/detail/CVE-2026-28350) | Medium | always | 2026-04-24 | +| `python-multipart` | `>=0.0.26` | [CVE-2026-24486](https://nvd.nist.gov/vuln/detail/CVE-2026-24486) (≥0.0.22); [CVE-2026-40347](https://nvd.nist.gov/vuln/detail/CVE-2026-40347) (≥0.0.26) | High | always | 2026-04-24 | +| `protobuf` | `>=6.33.5` | [CVE-2026-0994](https://nvd.nist.gov/vuln/detail/CVE-2026-0994) | High | always | 2026-04-24 | +| `nbconvert` | `>=7.17.1` | [CVE-2025-53000](https://nvd.nist.gov/vuln/detail/CVE-2025-53000) (≥7.17.0); [CVE-2026-39377](https://nvd.nist.gov/vuln/detail/CVE-2026-39377), [CVE-2026-39378](https://nvd.nist.gov/vuln/detail/CVE-2026-39378) (≥7.17.1) | High | with the `jupyter` extra | 2026-04-24 (≥7.17.1) | +| `jupyter-core` | `>=5.8.1` | [CVE-2025-30167](https://nvd.nist.gov/vuln/detail/CVE-2025-30167) | High | with the `jupyter` extra | 2025-12-10 | +| `jupyterlab` | `>=4.4.9` | [CVE-2025-59842](https://nvd.nist.gov/vuln/detail/CVE-2025-59842) | Low | with the `jupyter` extra | 2025-12-10 | +| `marimo` | `>=0.23.0,<1` | [GHSA-2679-6mx9-h9xc](https://github.com/advisories/GHSA-2679-6mx9-h9xc) | Medium | with the `marimo` extra | 2026-04-24 | +| `uv` | `>=0.11.6` | [CVE-2025-54368](https://nvd.nist.gov/vuln/detail/CVE-2025-54368), [GHSA-w476-p2h3-79g9](https://github.com/advisories/GHSA-w476-p2h3-79g9), [GHSA-pqhf-p39g-3x64](https://github.com/advisories/GHSA-pqhf-p39g-3x64) (≥0.9.7); [GHSA-pjjw-68hj-v9mw](https://github.com/advisories/GHSA-pjjw-68hj-v9mw) (≥0.11.6) | Medium | dev only | 2025-12-10 (≥0.9.7); 2026-04-24 raised to ≥0.11.6 | +| `pytest` | `>=9.0.3,<10` | [CVE-2025-71176](https://nvd.nist.gov/vuln/detail/CVE-2025-71176) | Medium | dev only | 2026-04-24 | +| `virtualenv` | `>=20.36.1` | [pypa/virtualenv#3013](https://github.com/pypa/virtualenv/pull/3013) TOCTOU fix; bundles filelock ≥3.20.1 for [CVE-2025-68146](https://nvd.nist.gov/vuln/detail/CVE-2025-68146) | Medium | dev only | 2026-04-24 | +| `fonttools` | `>=4.60.2` | [CVE-2025-66034](https://nvd.nist.gov/vuln/detail/CVE-2025-66034) / [GHSA-768j-98cg-p3fv](https://github.com/advisories/GHSA-768j-98cg-p3fv) | Medium | dev only | 2025-12-10 | diff --git a/compass.yml b/compass.yml index 164219174..1979bc621 100644 --- a/compass.yml +++ b/compass.yml @@ -1,17 +1,45 @@ name: python-sdk id: ari:cloud:compass:fff788d2-8a2a-4c36-a884-dde2bb4a2b49:component/f65912bc-77bd-4e1f-b333-1a9e8f0ac32c/aca547e5-2577-4f9d-9aaa-cf360acc976c -description: 🔬 Python SDK providing access to the Aignostics Platform. Includes Aignostics Launchpad (Desktop Application), Aignostics CLI (Command-Line Interface), example notebooks, and Aignostics Client Library. +description: 🔬 Python SDK providing access to the Aignostics Platform. Includes + Aignostics Launchpad (Desktop Application), Aignostics CLI (Command-Line + Interface), example notebooks, and Aignostics Client Library. configVersion: 1 typeId: APPLICATION ownerId: ari:cloud:identity::team/b1cfb598-3138-4c2a-8b6a-19fb1078645b fields: - tier: 4 + tier: 1 + lifecycle: Active + isMonorepoProject: false links: - name: null type: REPOSITORY url: https://github.com/aignostics/python-sdk + - name: Jira Board + type: PROJECT + url: https://aignx.atlassian.net/jira/software/c/projects/PYSDK/boards/1799 + - name: Status Page + type: DASHBOARD + url: https://status.aignostics.com + - name: '#python-sdk-dev' + type: CHAT_CHANNEL + url: https://slack.com/app_redirect?channel=C098D8MH431 + - name: '#python-sdk-notifications' + type: CHAT_CHANNEL + url: https://slack.com/app_redirect?channel=C0AUPTA5QF9 + - name: '#python-sdk-announcements' + type: CHAT_CHANNEL + url: https://slack.com/app_redirect?channel=C08TN9NEY3Z + - name: Ketryx Project + type: OTHER_LINK + url: https://app.ketryx.com/projects/KXPRJ2Q4PA8AADY975SFMKF276TYV75 + - name: Sentry + type: DASHBOARD + url: https://aignostics.sentry.io/projects/python-sdk/ relationships: - DEPENDS_ON: [] + DEPENDS_ON: + - 'ari:cloud:compass:fff788d2-8a2a-4c36-a884-dde2bb4a2b49:component/f65912bc-77bd-4e1f-b333-1a9e8f0ac32c/638f89c6-24a3-48f4-90ad-1b09ef8d32c6' + - 'ari:cloud:compass:fff788d2-8a2a-4c36-a884-dde2bb4a2b49:component/f65912bc-77bd-4e1f-b333-1a9e8f0ac32c/8a5a2394-89b7-4c84-9b14-947ce2cf9fdd' + - 'ari:cloud:compass:fff788d2-8a2a-4c36-a884-dde2bb4a2b49:component/f65912bc-77bd-4e1f-b333-1a9e8f0ac32c/b2f7a1a1-112c-4d68-aa3c-7ce0c04eda2d' labels: - aignostics - atlas @@ -38,25 +66,25 @@ customFields: value: false - name: Component Key type: text - value: null + value: python-sdk - name: Deployment Branch type: text - value: null + value: release/* - name: Development Branch type: text - value: null + value: main - name: FinOps Label type: text value: null - name: Has external API type: boolean - value: false + value: true - name: Has external CLI type: boolean - value: false + value: true - name: Has external graphical UI type: boolean - value: false + value: true - name: Has internal API type: boolean value: false @@ -68,10 +96,13 @@ customFields: value: false - name: Is independently deployable type: boolean - value: false + value: true + - name: Is open source + type: boolean + value: true - name: Path in Repository type: text - value: null + value: / - name: Process Level type: single_select - value: null + value: 7a2a380d-da85-476f-ae5d-dfc034889b61 diff --git a/docs/partials/README_footer.md b/docs/partials/README_footer.md index 26a29209b..6c2523bf5 100644 --- a/docs/partials/README_footer.md +++ b/docs/partials/README_footer.md @@ -3,6 +3,9 @@ 1. Inspect our [security policy](https://aignostics.readthedocs.io/en/latest/security.html) with detailed documentation of checks, tools and principles. + How we handle vulnerabilities in our Python dependency supply chain is + documented in + [SUPPLY_CHAIN_VULNERABILITIES.md](https://github.com/aignostics/python-sdk/blob/main/SUPPLY_CHAIN_VULNERABILITIES.md). 1. Inspect how we achieve [operational excellence](https://aignostics.readthedocs.io/en/latest/operational_excellence.html) with information on our modern toolchain and software architecture. diff --git a/docs/partials/README_main.md b/docs/partials/README_main.md index 65a347f9f..9232a42bb 100644 --- a/docs/partials/README_main.md +++ b/docs/partials/README_main.md @@ -462,24 +462,24 @@ from aignostics import platform client = platform.Client() # submit an application run application_run = client.runs.submit( - application_id="test-app", - items=[ - platform.InputItem( - external_id="slide-1", - input_artifacts=[ - platform.InputArtifact( - name="whole_slide_image", - download_url="", - metadata={ - "checksum_base64_crc32c": "AAAAAA==", - "resolution_mpp": 0.25, - "width_px": 1000, - "height_px": 1000, - }, - ) - ], - ), - ], + application_id="test-app", + items=[ + platform.InputItem( + external_id="slide-1", + input_artifacts=[ + platform.InputArtifact( + name="whole_slide_image", + download_url="", + metadata={ + "checksum_base64_crc32c": "AAAAAA==", + "resolution_mpp": 0.25, + "width_px": 1000, + "height_px": 1000, + }, + ) + ], + ), + ], ) # wait for the results and download incrementally as they become available application_run.download_to_folder("path/to/download/folder") @@ -556,21 +556,23 @@ the latest version will be used automatically. Additionally, you need to define want to process in the run. The input items are defined as follows: ```python -platform.InputItem( - external_id="1", - input_artifacts=[ - platform.InputArtifact( - name="whole_slide_image", # defined by the application version's input artifact schema - download_url="", - metadata={ # defined by the application version's input artifact schema - "checksum_base64_crc32c": "N+LWCg==", - "resolution_mpp": 0.46499982, - "width_px": 3728, - "height_px": 3640, - }, - ) - ], -), +( + platform.InputItem( + external_id="1", + input_artifacts=[ + platform.InputArtifact( + name="whole_slide_image", # defined by the application version's input artifact schema + download_url="", + metadata={ # defined by the application version's input artifact schema + "checksum_base64_crc32c": "N+LWCg==", + "resolution_mpp": 0.46499982, + "width_px": 3728, + "height_px": 3640, + }, + ) + ], + ), +) ``` For each item you want to process, you need to provide a unique `reference` diff --git a/noxfile.py b/noxfile.py index 48f2040ed..a4c51d5c4 100644 --- a/noxfile.py +++ b/noxfile.py @@ -145,23 +145,16 @@ def audit(session: nox.Session) -> None: """Run security audit and license checks.""" _setup_venv(session) - # pip-audit to check for vulnerabilities + # pip-audit to check for vulnerabilities. + # Every --ignore-vuln entry must correspond to a row in SUPPLY_CHAIN_VULNERABILITIES.md + # with rationale, scope, downstream-exposure assessment, and removal condition. try: session.run( - # TODO(Helmut): Ignore pip vuln until pip achieved to build v5.3 "pip-audit", "-f", "json", "-o", "reports/vulnerabilities.json", - "--ignore-vuln", - "GHSA-4xh5-x5gv-qwph", # https://pyinstaller.org/en/stable/license.html - "--ignore-vuln", - "CVE-2025-53000", # no fix available - "--ignore-vuln", - "CVE-2025-69872", # no fix available - "--ignore-vuln", - "CVE-2026-4539", # no fix available ) except CommandFailed: _format_json_with_jq(session, "reports/vulnerabilities.json") diff --git a/pyproject.toml b/pyproject.toml index 8e990c83e..f45bff877 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,7 +78,7 @@ dependencies = [ # From Template "fastapi[all,standard]>=0.123.10", "humanize>=4.14.0,<5", - "nicegui[native]>=3.5.0,<4", # CVE-2026-21871, CVE-2026-21871, CVE-2026-21873, CVE-2026-21874 all require >=3.5.0 + "nicegui[native]>=3.11.0,<4", # CVE-2026-21871, CVE-2026-21873, CVE-2026-21874 (>=3.5.0); CVE-2026-25516 (>=3.7.0, #418); CVE-2026-27156 (>=3.8.0, #448); CVE-2026-33332 (>=3.9.0, #498); CVE-2026-39844 (>=3.10.0, #531). 3.11.0 fixes async event handler exception leaks and refines ValueChangeEventArguments generics. "packaging>=26,<27", "platformdirs>=4.5.1,<5", "psutil>=7.1.3,<8", @@ -99,19 +99,19 @@ dependencies = [ "highdicom>=0.26.1,<1; python_version < '3.14'", # transitive dependency pyjpegls not yet supporting Python 3.14 "html-sanitizer>=2.6.0,<3", "httpx>=0.28.1,<1", - "idc-index-data==23.8.1", + "idc-index-data==23.10.1", "ijson>=3.4.0.post0,<4", "jsf>=0.11.2,<1", "jsonschema[format-nongpl]>=4.25.1,<5", "loguru>=0.7.3,<1", "openslide-bin>=4.0.0.10,<5", "openslide-python>=1.4.3,<2", - "pandas>=2.3.3,<3", + "pandas>=2.3.3,<4", "platformdirs>=4.3.2,<5", "procrastinate>=3.5.3", - "fastparquet>=2025.12.0,<2026.0.0; python_version < '3.14'", - "pyarrow>=22.0.0,<23; python_version >= '3.14'", - "pyjwt[crypto]>=2.10.1,<3", + "fastparquet>=2026.3.0,<2026.4.0; python_version < '3.14'", + "pyarrow>=23.0.1,<24; python_version >= '3.14'", + "pyjwt[crypto]>=2.12.0,<3", # CVE-2026-32597 requires >=2.12.0 (Renovate #475) "python-dateutil>=2.9.0.post0,<3", # "pywebview[qt6]>=5.4,<6; sys_platform == 'linux'", "requests>=2.33.0,<3", # CVE-2026-25645 requires >= 2.33.0 @@ -125,19 +125,26 @@ dependencies = [ "urllib3>=2.6.3,<3", # CVE-2026-21441 requires >= 2.6.3 "wsidicom>=0.28.1,<1", "fastmcp>=3.2.0,<4", - # Transitive overrides + # Transitive overrides (see SUPPLY_CHAIN_VULNERABILITIES.md) # WARNING: one cannot negate or downgrade a dependency required here. use override-dependencies for that. "rfc3987; sys_platform == 'never'", # GPLv3 "h11>=0.16.0", # CVE-2025-43859 - "tornado>=6.5.0", # CVE-2025-47287 - "urllib3>=2.5.0", # CVE-2025-50181, CVE-2025-50182, - "pillow>=11.3.0", # CVE-2025-48379, - "aiohttp>=3.13.3", # CVE-2025-53643, CVE-2025-69223, CVE-2025-69224, CVE-2025-69228, CVE-2025-69229, CVE-2025-69230, CVE-2025-69226, CVE-2025-69227, CVE-2025-69225 + "tornado>=6.5.5", # CVE-2025-47287 (>=6.5.0); GHSA-78cv-mqj4-43f7 (>=6.5.5, Renovate #472) + "urllib3>=2.5.0", # CVE-2025-50181, CVE-2025-50182 + "pillow>=12.2.0", # CVE-2025-48379 (>=11.3.0); CVE-2026-25990 (>=12.1.1, Renovate #428); CVE-2026-40192 (>=12.2.0, Renovate #539) + "aiohttp>=3.13.4", # CVE-2025-53643, CVE-2025-69223..9 (>=3.13.3); CVE-2026-22815 (>=3.13.4, Renovate #527) "starlette>=0.47.2", # CVE-2025-54121 "starlette>=0.49.1", # GHSA-7f5h-v6xp-fcq8 - "lxml>=6.0.2", # For python 3.14 pre-built wheels - "filelock>=3.20.1", # CVE-2025-68146 + "lxml>=6.1.0", # CVE-2026-41066 (Renovate #556); also required for python 3.14 pre-built wheels + "filelock>=3.20.3", # CVE-2025-68146 (>=3.20.1); CVE-2026-22701 (>=3.20.3, Renovate #387) "marshmallow>=3.26.2", # CVE-2025-68480 + "pygments>=2.20.0", # CVE-2026-4539 (>=2.20.0); transitive via rich + "cryptography>=46.0.7", # CVE-2026-39892 (>=46.0.7); transitive via pyjwt[crypto] + "pydicom>=3.0.2", # CVE-2026-32711 (>=3.0.2); transitive via dicomweb-client/wsidicom/highdicom + "pyasn1>=0.6.3", # CVE-2026-30922 (>=0.6.3); transitive via cryptography + "lxml-html-clean>=0.4.4", # CVE-2026-28348, CVE-2026-28350 (>=0.4.4); transitive via html-sanitizer + "python-multipart>=0.0.26", # CVE-2026-24486 (>=0.0.22), CVE-2026-40347 (>=0.0.26); transitive via fastapi/starlette + "protobuf>=6.33.5", # CVE-2026-0994 (>=6.33.5); transitive via google-cloud-storage/sentry-sdk ] [project.optional-dependencies] @@ -148,11 +155,12 @@ jupyter = [ # WARNING: one cannot negate or downgrade a dependency required here. use override-dependencies for that. "jupyter-core>=5.8.1", # CVE-2025-30167 "jupyterlab>=4.4.9", # CVE-2025-59842 + "nbconvert>=7.17.1", # CVE-2025-53000 (>=7.17.0, Dependabot #424); CVE-2026-39377, CVE-2026-39378 (>=7.17.1, Dependabot #553) ] marimo = [ "cloudpathlib>=0.23.0,<1", "ipython>=9.8.0,<10", - "marimo>=0.18.4,<1", + "marimo>=0.23.0,<1", # GHSA-2679-6mx9-h9xc (Renovate #533) "matplotlib>=3.10.7,<4", "shapely>=2.1.0,<3", ] @@ -167,15 +175,16 @@ dev = [ "furo>=2025.9.25", "git-cliff>=2.10.1,<3", "mypy>=1.19.0,<2", - "myst-parser>=4.0.1,<5", + "myst-parser>=5,<6", "nox[uv]>=2025.11.12", "pip-audit>=2.10.0,<3", "pip-licenses @ git+https://github.com/neXenio/pip-licenses.git@master", # https://github.com/raimon49/pip-licenses/pull/224 "pre-commit>=4.5.0,<5", "pyright>=1.1.408,<1.1.409", # Regression in 1.1.407, see https://github.com/microsoft/pyright/issues/11060 - "pytest>=9.0.2,<10", + "pytest>=9.0.3,<10", # CVE-2025-71176 requires >=9.0.3 (Renovate #538) "pytest-asyncio>=1.3.0,<2", "pytest-cov>=7.0.0,<8", + "coverage>=7.12.0,<7.13", # Pin: coverage 7.13.x introduced a regression where pytest-cov fails with "INTERNALERROR: Expected current collector to be ..." when tests spawn subprocesses (e.g., marimo server). Re-evaluate when coverage releases a fix. "pytest-docker>=3.2.5,<4", "pytest-durations>=1.6.1,<2", "pytest-env>=1.2.0,<2", @@ -189,7 +198,7 @@ dev = [ "pytest-xdist[psutil]>=3.8.0,<4", "ruff>=0.14.8,<1", "scalene>=2.0.1,<3", - "sphinx>=8.2.3,<9", + "sphinx>=8.2.3,<9", # Tightened from <10: sphinx-toolbox 4.1.2 imports `logger` from `sphinx.ext.autodoc`, which Sphinx 9 removed. Re-evaluate when sphinx-toolbox ships Sphinx 9 support. "sphinx-autobuild>=2025.8.25,<2026", "sphinx-click>=6.2.0,<7", "sphinx-copybutton>=0.5.2,<1", @@ -200,20 +209,21 @@ dev = [ "sphinx-toolbox>=4.1.0,<5", "sphinxcontrib.mermaid", # https://github.com/mgaitan/sphinxcontrib-mermaid "sphinxext.opengraph>=0.9.1,<1", - "swagger-plugin-for-sphinx>=6.1.0,<7", + "swagger-plugin-for-sphinx>=7,<8", "tomli>=2.3.0,<3", "types-pyyaml>=6.0.12.20250915,<7", "types-requests>=2.32.4.20250913,<3", "watchdog>=6.0.0,<7", # Transitive overrides # WARNING: one cannot negate or downgrade a dependency required here. use override-dependencies for that. - "pip>=26.1", # CVE-2025-8869 - "uv>=0.9.7", # CVE-2025-54368, GHSA-w476-p2h3-79g9, GHSA-pqhf-p39g-3x64 + "pip>=26.1", # CVE-2025-8869 (Medium, >=25.3); CVE-2026-3219 (Medium, >=26.1, released 2026-04-26 via pypa/pip#13870) + "uv>=0.11.6", # CVE-2025-54368, GHSA-w476-p2h3-79g9, GHSA-pqhf-p39g-3x64 (>=0.9.7); GHSA-pjjw-68hj-v9mw (>=0.11.6, Renovate #536) "fonttools>=4.60.2", # CVE-2025-66034 (GHSA-768j-98cg-p3fv), dep of matplotlib + "virtualenv>=20.36.1", # pypa/virtualenv#3013 TOCTOU in app_data/lock dir; bundles filelock>=3.20.1 for CVE-2025-68146; transitive via nox/pre-commit ] [tool.uv] -required-version = ">=0.9.7" # CVE-2025-54368, GHSA-w476-p2h3-79g9, GHSA-pqhf-p39g-3x64 +required-version = ">=0.11.6" # CVE-2025-54368, GHSA-w476-p2h3-79g9, GHSA-pqhf-p39g-3x64, GHSA-pjjw-68hj-v9mw # WARNING: override-dependencies is *not* respected by uvx override-dependencies = [ # https://github.com/astral-sh/uv/issues/4422 "pytest>=9.0.1", # pytest-md-report depends on pytest<9 unnecessarily @@ -290,6 +300,9 @@ ignore = [ ] [tool.ruff.lint.per-file-ignores] +"**/__init__.py" = [ + "RUF067", # __init__ modules may contain initialization logic beyond re-exports +] "**/tests/**/*.py" = [ # we are more relaxed in tests, while sill applying hundreds of rules "S101", # asserts allowed in tests... @@ -311,6 +324,7 @@ ignore = [ "ASYNC230", # async functions should not open files with blocking methods like `open` "S104", # bind to all ports "S607", # subprocess with partial path + "RUF069", # float equality comparisons in tests are intentional ] [tool.ruff.format] diff --git a/pyrightconfig.json b/pyrightconfig.json index 612b268a0..b6a233da8 100644 --- a/pyrightconfig.json +++ b/pyrightconfig.json @@ -3,6 +3,7 @@ "exclude": [ "**/.nox/**", "**/.venv/**", + "**/.claude/worktrees/**", "**/dist-packages/**", "**/dist_vercel/.vercel/**", "**/dist_native/**", diff --git a/renovate.json b/renovate.json index 2495efd38..c0cb2b2de 100644 --- a/renovate.json +++ b/renovate.json @@ -22,6 +22,24 @@ "schedule": ["before 5am on monday"] }, "packageRules": [ + { + "description": "Runtime-critical packages whose bumps touch the GUI event loop, HTTP client, or pydantic models — regressions in these only surface end-to-end (closure lifecycles, event handler exception swallowing, request retry semantics, ...). Override the global labels to drop `skip:test:long_running` so the long-running e2e matrix runs and catches integration regressions on every bump. Discovered while shipping nicegui 3.10/3.11 (#531) where a silently-swallowed AssertionError inside an async click handler hung a download dialog and was only caught by the long-running suite.", + "matchPackageNames": [ + "nicegui", + "fastapi", + "starlette", + "httpx", + "pydantic", + "pydantic-settings", + "uvicorn" + ], + "labels": [ + "bot", + "renovate", + "dependencies", + "skip:codecov" + ] + }, { "groupName": "minor and patch dependencies", "matchManagers": ["pep621"], diff --git a/requirements/SWR_SYSTEM_GUI_STATUS_PAGE_1.md b/requirements/SWR_SYSTEM_GUI_STATUS_PAGE_1.md new file mode 100644 index 000000000..8da62c7d8 --- /dev/null +++ b/requirements/SWR_SYSTEM_GUI_STATUS_PAGE_1.md @@ -0,0 +1,23 @@ +--- +itemId: SWR-SYSTEM-GUI-STATUS-PAGE-1 +itemTitle: Per-Environment Betterstack Status Page in Launchpad +itemHasParent: SHR-SYSTEM-1 +itemType: Requirement +Requirement type: FUNCTIONAL +Module: System +Layer: GUI +--- + +As a Launchpad user, I expect the embedded Betterstack status badge and the "Check Platform Status" link to reflect only the Aignostics Platform environment my Launchpad is connected to (as configured by `AIGNOSTICS_API_ROOT`), so that I can assess the operational health of the services I actually depend on without being distracted by, or misled by, the health of unrelated environments. + +The Launchpad shall resolve the public Betterstack status page URL from the configured platform environment as follows: + +- when connected to the production environment (`AIGNOSTICS_API_ROOT` = `https://platform.aignostics.com`), the Launchpad shall embed the badge of, and link to, `https://status.platform.aignostics.com`; +- when connected to the staging environment (`AIGNOSTICS_API_ROOT` = `https://platform-staging.aignostics.com`), the Launchpad shall embed the badge of, and link to, `https://status.platform-staging.aignostics.com`; +- when connected to a dev or test environment, or to any other environment for which no per-environment public Betterstack status page is configured, the Launchpad shall not render the Betterstack badge in its footer and shall not render the "Check Platform Status" item in its right-side menu. + +The user shall be able to override the resolved status page URL through the `AIGNOSTICS_STATUS_PAGE_URL` environment variable or the equivalent constructor argument, including overriding it to an empty value to suppress the badge and link. + +The Launchpad shall validate any user-supplied status page URL at configuration time and reject values that are not well-formed http(s) URLs or that contain characters that could break out of an HTML attribute when rendered (`"`, `'`, `<`, `>`, backtick, backslash, whitespace), so that the Launchpad cannot be tricked into rendering attacker-controlled markup through this configuration. + +When the Launchpad does not render the badge or the menu link, no degraded-state placeholder is shown — both surfaces are simply omitted from the layout. diff --git a/specifications/SPEC-APPLICATION-SERVICE.md b/specifications/SPEC-APPLICATION-SERVICE.md index 8ea8d07de..918c7c820 100644 --- a/specifications/SPEC-APPLICATION-SERVICE.md +++ b/specifications/SPEC-APPLICATION-SERVICE.md @@ -343,11 +343,9 @@ class Service: RuntimeError: When submission fails """ pass + def application_run_download( - self, - run_id: str, - output_dir: Path, - progress_callback: Optional[Callable] = None + self, run_id: str, output_dir: Path, progress_callback: Optional[Callable] = None ) -> DownloadProgress: """Download results with progress tracking diff --git a/specifications/SPEC-BUCKET-SERVICE.md b/specifications/SPEC-BUCKET-SERVICE.md index 7dcfd401a..5b311bf48 100644 --- a/specifications/SPEC-BUCKET-SERVICE.md +++ b/specifications/SPEC-BUCKET-SERVICE.md @@ -178,8 +178,9 @@ graph LR class Service(BaseService): """Bucket service for S3-compatible cloud storage operations.""" - def upload(self, source_path: Path, destination_prefix: str, - callback: Callable[[int, Path], None] | None = None) -> dict[str, list[str]]: + def upload( + self, source_path: Path, destination_prefix: str, callback: Callable[[int, Path], None] | None = None + ) -> dict[str, list[str]]: """Upload file or directory to cloud storage. Args: @@ -195,10 +196,13 @@ class Service(BaseService): BotoClientError: S3 API operation failure """ - def download(self, what: list[str] | None = None, - destination: Path = get_user_data_directory("bucket_downloads"), - what_is_key: bool = False, - progress_callback: Callable[[DownloadProgress], None] | None = None) -> DownloadResult: + def download( + self, + what: list[str] | None = None, + destination: Path = get_user_data_directory("bucket_downloads"), + what_is_key: bool = False, + progress_callback: Callable[[DownloadProgress], None] | None = None, + ) -> DownloadResult: """Download files from cloud storage with optional pattern matching. Args: @@ -215,8 +219,7 @@ class Service(BaseService): BotoClientError: S3 API operation failure """ - def delete(self, what: list[str] | None, what_is_key: bool = False, - dry_run: bool = True) -> int: + def delete(self, what: list[str] | None, what_is_key: bool = False, dry_run: bool = True) -> int: """Delete objects from cloud storage. Args: diff --git a/specifications/SPEC-LAUNCHPAD-STATUS-PAGE.md b/specifications/SPEC-LAUNCHPAD-STATUS-PAGE.md new file mode 100644 index 000000000..1dd1a1c03 --- /dev/null +++ b/specifications/SPEC-LAUNCHPAD-STATUS-PAGE.md @@ -0,0 +1,194 @@ +--- +itemId: SPEC-LAUNCHPAD-STATUS-PAGE +itemTitle: Per-Environment Betterstack Status Page in Launchpad +itemType: Software Item Spec +itemFulfills: SWR-SYSTEM-GUI-STATUS-PAGE-1 +itemIsRelatedTo: SPEC_GUI_SERVICE, SPEC_PLATFORM_SERVICE, SPEC_SYSTEM_SERVICE +Module: System +Layer: GUI / Platform Service +Version: 1.0.0 +Date: 2026-04-26 +--- + +## 1. Description + +### 1.1 Purpose + +This specification describes how the Aignostics Launchpad (Desktop Application, NiceGUI-based) renders the embedded Betterstack status badge in its footer and the "Check Platform Status" link in its right-side menu so that both reflect only the Aignostics Platform environment the Launchpad is currently connected to (i.e., the environment selected by `AIGNOSTICS_API_ROOT`). + +The motivation is that the legacy aggregate page at `https://status.aignostics.com` covers production *and* staging *and* unrelated services (Console, Portal, Career Site, Website). A user running the Launchpad against a single environment is best served by the corresponding **narrower** Betterstack property of that same environment, with no badge or link rendered when no per-environment Betterstack property exists (dev, test, or unknown environments). + +### 1.2 Functional Requirements + +The Launchpad shall: + +- **[FR-01]** Resolve the public Betterstack status page URL from the configured `api_root` of the platform `Settings` model. +- **[FR-02]** Use `https://status.platform.aignostics.com` for production (`https://platform.aignostics.com`) and `https://status.platform-staging.aignostics.com` for staging (`https://platform-staging.aignostics.com`). +- **[FR-03]** Use `None` (i.e., no public per-environment status page) for the dev environment (`https://platform-dev.aignostics.ai`) and the test environment (`https://platform-test.aignostics.ai`), and for any unknown `api_root` whose auth fields are otherwise fully provided. +- **[FR-04]** Allow the user to override the resolved value through the `AIGNOSTICS_STATUS_PAGE_URL` environment variable or the `status_page_url` constructor argument of `Settings`. An empty string is treated as `None`. +- **[FR-05]** Validate the resolved value at `Settings` construction time, rejecting values that are not well-formed http(s) URLs and values that contain `"`, `'`, `<`, `>`, backtick, backslash, or whitespace characters. +- **[FR-06]** When the resolved value is non-`None`, render the Betterstack badge in the footer (as a 250×30 iframe pointing at `/badge?theme=dark`) and a "Check Platform Status" link in the right-side menu pointing at ``. +- **[FR-07]** When the resolved value is `None`, omit the Betterstack badge from the footer and omit the "Check Platform Status" item from the right-side menu — no degraded-state placeholder is rendered. +- **[FR-08]** Refresh the Betterstack iframe every 30 seconds (in alignment with the existing health-update interval), guarded so the refresh is a safe no-op when the iframe is absent from the DOM. + +### 1.3 Non-Functional Requirements + +- **Security**: User-controlled values must not be able to inject markup into the Launchpad webview. Defence-in-depth: (1) `Settings.status_page_url` is validated by `_validate_optional_url` before reaching the GUI layer; (2) the iframe is rendered via NiceGUI's `ui.element('iframe')` with attributes assigned through the props dict, so attribute values flow through Vue data binding rather than raw HTML construction. +- **Backwards compatibility**: An unknown `api_root` (with all auth fields provided) must produce a safe default (`None`, no badge, no link) rather than raising an error. The aggregate `https://status.aignostics.com` page must remain unchanged and reachable for users who navigate to it directly. +- **Resilience**: The 30-second iframe-refresh JS must remain safe when the iframe is absent from the DOM (dev/test or override-to-`None` cases). The behaviour shall not depend on the order in which the timer first fires relative to first DOM mount. + +### 1.4 Constraints and Limitations + +- The dev and test environments do not currently have a dedicated public Betterstack property; this specification deliberately treats that as a "no badge, no link" state, not an error. +- The `Settings` `pre_init` model validator returns early when all auth fields are explicitly provided. In that path, the per-environment match block is skipped, and `status_page_url` retains its declared default (`None`) unless the caller supplied it explicitly. + +--- + +## 2. Architecture and Design + +### 2.1 Files Touched + +| File | Role | +| --- | --- | +| `src/aignostics/platform/_constants.py` | Per-environment URL constants `STATUS_PAGE_URL_DEV`, `STATUS_PAGE_URL_TEST`, `STATUS_PAGE_URL_STAGING`, `STATUS_PAGE_URL_PRODUCTION`. | +| `src/aignostics/platform/_settings.py` | `Settings.status_page_url: str \| None` field with `BeforeValidator(_validate_optional_url)`; resolution inside the existing `pre_init` `match...case` block alongside the auth endpoints; helper `_validate_optional_url(value: str \| None) -> str \| None`. | +| `src/aignostics/platform/__init__.py` | Re-exports the four `STATUS_PAGE_URL_*` constants for downstream consumers. | +| `src/aignostics/gui/_frame.py` | Reads `settings().status_page_url` once after the context manager `yield`. Conditionally renders the right-menu "Check Platform Status" item, the footer iframe, and the 30-s refresh JS based on this value. Defensive JS element guard `if (iframe) { iframe.src = iframe.src; }` so the refresh never throws when the iframe is absent. | +| `tests/aignostics/platform/settings_test.py` | Per-environment assertions on `status_page_url` and parametrised rejection of invalid/unsafe URLs. | + +### 2.2 Resolution Algorithm + +```text +input: api_root (string), explicit overrides (env var, constructor argument) +output: status_page_url: str | None + +1. If the user provided `status_page_url` explicitly (constructor arg or + `AIGNOSTICS_STATUS_PAGE_URL` env var): + → run `_validate_optional_url`; on success use that value. +2. Else, in the existing `pre_init` `match...case`: + - api_root == API_ROOT_DEV → setdefault to STATUS_PAGE_URL_DEV (None) + - api_root == API_ROOT_TEST → setdefault to STATUS_PAGE_URL_TEST (None) + - api_root == API_ROOT_STAGING → setdefault to STATUS_PAGE_URL_STAGING + - api_root == API_ROOT_PRODUCTION → setdefault to STATUS_PAGE_URL_PRODUCTION + - any other api_root with all auth fields supplied: + → field default applies (None) + - any other api_root without auth fields: + → ValueError UNKNOWN_ENDPOINT_URL +``` + +### 2.3 Validation + +`_validate_optional_url(value: str | None) -> str | None` is registered as a Pydantic `BeforeValidator` on the field: + +1. `None` → `None`. +2. `""` → `None` (env-var loaders may produce an empty string when the variable is set but empty; treating it as `None` matches the dev/test default). +3. Non-empty string: + 1. Reject if it contains any of `"`, `'`, `<`, `>`, backtick, backslash, or whitespace (RFC 3986 requires those to be percent-encoded; raw forms are either malformed or an injection attempt). + 2. Otherwise, delegate to the existing `_validate_url` (scheme must be `http` or `https`; netloc must be non-empty). + +### 2.4 Rendering + +In `gui/_frame.py`: + +```python +status_page_url = settings().status_page_url # resolved once, reused + +if status_page_url: + # right-menu: "Check Platform Status" item with ui.link(...) + +if status_page_url: + # footer: NiceGUI iframe element, attributes via props dict (no raw HTML) + iframe = ui.element("iframe") + iframe.props["id"] = "betterstack" + iframe.props["src"] = urljoin(status_page_url + "/", "badge?theme=dark") + iframe.props["width"] = "250" + iframe.props["height"] = "30" + iframe.props["frameborder"] = "0" + iframe.props["scrolling"] = "no" + iframe.style("color-scheme: dark; margin-left: 0px;") + +# 30-s refresh, runs unconditionally; element existence is guarded in JS. +ui.run_javascript( + "var iframe = document.getElementById('betterstack');" + "if (iframe) { iframe.src = iframe.src; }" +) +``` + +The iframe is rendered as a NiceGUI `ui.element('iframe')` rather than `ui.html('', - sanitize=False, - ).style("margin-left: 0px;") - ui.tooltip("Check Platform Status") + if status_page_url: # pragma: no branch — covered by smoke test, conditional render + # Render the Betterstack badge as a NiceGUI iframe element. Attributes are + # assigned via the props dict (not via an HTML/string template) so NiceGUI's + # Vue data binding handles attribute escaping. Together with the http(s)-only + # validation in Settings (`_validate_optional_url`), this removes the XSS + # surface that a sanitize=False `ui.html()` would otherwise expose. + with ui.row().style("padding: 0"): + iframe = ui.element("iframe") + iframe.props["id"] = "betterstack" + iframe.props["src"] = urljoin(status_page_url + "/", "badge?theme=dark") + iframe.props["width"] = "250" + iframe.props["height"] = "30" + iframe.props["frameborder"] = "0" + iframe.props["scrolling"] = "no" + iframe.style("color-scheme: dark; margin-left: 0px;") + ui.tooltip("Check Platform Status") ui.space() with ui.row(): flavor = " (native)" if getattr(sys, "frozen", False) else "" diff --git a/src/aignostics/notebook/CLAUDE.md b/src/aignostics/notebook/CLAUDE.md index 882fd94f9..9af048f6f 100644 --- a/src/aignostics/notebook/CLAUDE.md +++ b/src/aignostics/notebook/CLAUDE.md @@ -66,30 +66,16 @@ Start → Monitor Output → Extract URL → Ready ```python MARIMO_SERVER_STARTUP_TIMEOUT = 60 # seconds + def start(self, notebook_path: Path, host: str, port: int): """Start Marimo server subprocess.""" - cmd = [ - sys.executable, "-m", "marimo", - "run", str(notebook_path), - "--host", host, - "--port", str(port), - "--headless" - ] - - self._marimo_server = Popen( - cmd, - stdout=PIPE, - stderr=STDOUT, - text=True, - creationflags=SUBPROCESS_CREATION_FLAGS - ) + cmd = [sys.executable, "-m", "marimo", "run", str(notebook_path), "--host", host, "--port", str(port), "--headless"] + + self._marimo_server = Popen(cmd, stdout=PIPE, stderr=STDOUT, text=True, creationflags=SUBPROCESS_CREATION_FLAGS) # Monitor thread watches for server URL - self._monitor_thread = Thread( - target=self._monitor_output, - daemon=True - ) + self._monitor_thread = Thread(target=self._monitor_output, daemon=True) ``` **URL Extraction Pattern:** @@ -115,12 +101,8 @@ def health(self) -> Health: """Check server and monitor thread health.""" components = { - "marimo_server": Health( - status=Health.Code.UP if self.is_marimo_server_running() else Health.Code.DOWN - ), - "monitor_thread": Health( - status=Health.Code.UP if self.is_monitor_thread_alive() else Health.Code.DOWN - ) + "marimo_server": Health(status=Health.Code.UP if self.is_marimo_server_running() else Health.Code.DOWN), + "monitor_thread": Health(status=Health.Code.UP if self.is_monitor_thread_alive() else Health.Code.DOWN), } return Health(status=Health.Code.UP, components=components) @@ -138,11 +120,7 @@ service = Service() # Start Marimo server notebook = Path("analysis.marimo.py") -server_url = service.start_notebook( - notebook_path=notebook, - host="127.0.0.1", - port=8080 -) +server_url = service.start_notebook(notebook_path=notebook, host="127.0.0.1", port=8080) # Server URL available after startup print(f"Notebook running at: {server_url}") @@ -163,6 +141,7 @@ The notebook module integrates with the main GUI launchpad: # In GUI context from aignostics.notebook._gui import create_notebook_interface + def setup_notebook_tab(ui): """Add notebook tab to GUI.""" @@ -208,16 +187,9 @@ def setup_notebook_tab(ui): **Logging Patterns:** ```python -logger.debug("Starting Marimo server", extra={ - "notebook": str(notebook_path), - "host": host, - "port": port -}) - -logger.warning("Server startup timeout", extra={ - "timeout": MARIMO_SERVER_STARTUP_TIMEOUT, - "output": self._output -}) +logger.debug("Starting Marimo server", extra={"notebook": str(notebook_path), "host": host, "port": port}) + +logger.warning("Server startup timeout", extra={"timeout": MARIMO_SERVER_STARTUP_TIMEOUT, "output": self._output}) ``` ## Common Pitfalls & Solutions @@ -232,9 +204,10 @@ logger.warning("Server startup timeout", extra={ def find_free_port(start=8080, end=9000): """Find available port.""" import socket + for port in range(start, end): with socket.socket() as s: - if s.connect_ex(('127.0.0.1', port)) != 0: + if s.connect_ex(("127.0.0.1", port)) != 0: return port raise RuntimeError("No free ports") ``` @@ -265,8 +238,9 @@ if not notebook_path.exists(): def cleanup_zombie_processes(): """Kill any lingering Marimo processes.""" import psutil - for proc in psutil.process_iter(['pid', 'name', 'cmdline']): - if 'marimo' in proc.info['cmdline']: + + for proc in psutil.process_iter(["pid", "name", "cmdline"]): + if "marimo" in proc.info["cmdline"]: proc.terminate() ``` @@ -317,6 +291,7 @@ def mock_marimo_server(): mock.return_value = process yield mock + def test_server_startup(mock_marimo_server): """Test server starts and extracts URL.""" service = Service() diff --git a/src/aignostics/platform/CLAUDE.md b/src/aignostics/platform/CLAUDE.md index b4ce746cc..6cb69dd76 100644 --- a/src/aignostics/platform/CLAUDE.md +++ b/src/aignostics/platform/CLAUDE.md @@ -116,23 +116,19 @@ class Client: if app.application_id == application_id: return app raise NotFoundException - - def application_version(self, application_id: str, - version_number: str | None = None) -> ApplicationVersion: + + def application_version(self, application_id: str, version_number: str | None = None) -> ApplicationVersion: """Get application version details. - + Args: application_id: The ID of the application (e.g., 'heta') version_number: The semantic version number (e.g., '1.0.0') If None, returns the latest version - + Returns: ApplicationVersion with application_id and version_number attributes """ - return Versions(self._api).details( - application_id=application_id, - application_version=version_number - ) + return Versions(self._api).details(application_id=application_id, application_version=version_number) ``` ### Authentication Flow (`_authentication.py`) @@ -190,6 +186,7 @@ LIST_APPLICATION_RUNS_MIN_PAGE_SIZE = 5 # In resources/utils.py PAGE_SIZE = 20 # Default for general pagination + def paginate(func, *args, page_size=PAGE_SIZE, **kwargs): """Generic pagination helper.""" page = 1 @@ -209,15 +206,15 @@ class Runs: self, application_id: str | None = None, application_version: str | None = None, - page_size: int = LIST_APPLICATION_RUNS_MAX_PAGE_SIZE + page_size: int = LIST_APPLICATION_RUNS_MAX_PAGE_SIZE, ): """List runs with pagination. - + Args: application_id: Optional filter by application ID application_version: Optional filter by version number (not version_id) page_size: Number of results per page (max 100) - + Returns: Iterator[Run] Iterator of Run instances """ @@ -278,21 +275,27 @@ class Runs: ```python # From _sdk_metadata.py (actual implementation) + class SubmissionMetadata(BaseModel): """Metadata about how the SDK was invoked.""" + date: str # ISO 8601 timestamp interface: Literal["script", "cli", "launchpad"] # How SDK was accessed source: Literal["user", "test", "bridge"] # Who initiated the run + class UserMetadata(BaseModel): """User information metadata.""" + organization_id: str organization_name: str user_email: str user_id: str + class GitHubCIMetadata(BaseModel): """GitHub Actions CI metadata.""" + action: str | None job: str | None ref: str | None @@ -309,27 +312,37 @@ class GitHubCIMetadata(BaseModel): workflow: str | None workflow_ref: str | None + class PytestCIMetadata(BaseModel): """Pytest test execution metadata.""" + current_test: str # Test name being executed markers: list[str] | None # Pytest markers applied + class CIMetadata(BaseModel): """CI/CD environment metadata.""" + github: GitHubCIMetadata | None pytest: PytestCIMetadata | None + class WorkflowMetadata(BaseModel): """Workflow control metadata.""" + onboard_to_aignostics_portal: bool = False + class SchedulingMetadata(BaseModel): """Scheduling metadata for run execution.""" + due_date: str | None # ISO 8601, requested completion time deadline: str | None # ISO 8601, hard deadline + class RunSdkMetadata(BaseModel): """Complete Run SDK metadata schema.""" + schema_version: str # Currently "0.0.4" created_at: str # ISO 8601 timestamp - NEW updated_at: str # ISO 8601 timestamp - NEW @@ -344,14 +357,18 @@ class RunSdkMetadata(BaseModel): model_config = {"extra": "forbid"} # Strict validation + class PlatformBucketMetadata(BaseModel): """Platform bucket storage metadata for items - NEW""" + bucket_name: str # Name of the cloud storage bucket object_key: str # Object key/path within the bucket signed_download_url: str # Signed URL for downloading + class ItemSdkMetadata(BaseModel): """Complete Item SDK metadata schema - NEW""" + schema_version: str # Currently "0.0.3" created_at: str # ISO 8601 timestamp updated_at: str # ISO 8601 timestamp @@ -440,7 +457,7 @@ def build_run_sdk_metadata(existing_metadata: dict[str, Any] | None = None) -> d metadata["ci"] = metadata.get("ci", {}) metadata["ci"]["pytest"] = { "current_test": os.environ["PYTEST_CURRENT_TEST"], - "markers": os.environ.get("PYTEST_MARKERS", "").split(",") + "markers": os.environ.get("PYTEST_MARKERS", "").split(","), } return metadata @@ -451,6 +468,7 @@ def build_run_sdk_metadata(existing_metadata: dict[str, Any] | None = None) -> d ```python # From resources/runs.py (actual implementation) + def submit(self, application_id: str, items: list, custom_metadata: dict = None): """Submit run with automatic SDK metadata attachment.""" @@ -468,11 +486,7 @@ def submit(self, application_id: str, items: list, custom_metadata: dict = None) custom_metadata["sdk"].update(sdk_metadata) # Submit run with merged metadata - return self._api.create_run( - application_id=application_id, - items=items, - custom_metadata=custom_metadata - ) + return self._api.create_run(application_id=application_id, items=items, custom_metadata=custom_metadata) ``` **JSON Schema Generation:** @@ -529,6 +543,7 @@ def validate_run_sdk_metadata(metadata: dict[str, Any]) -> bool: logger.exception("SDK metadata validation failed") raise + def validate_run_sdk_metadata_silent(metadata: dict[str, Any]) -> bool: """Validate Run SDK metadata without raising exceptions.""" try: @@ -537,6 +552,7 @@ def validate_run_sdk_metadata_silent(metadata: dict[str, Any]) -> bool: except ValidationError: return False + def get_run_sdk_metadata_json_schema() -> dict[str, Any]: """Get JSON Schema for Run SDK metadata with $schema and $id fields.""" schema = RunSdkMetadata.model_json_schema() @@ -547,6 +563,7 @@ def get_run_sdk_metadata_json_schema() -> dict[str, Any]: ) return schema + def build_item_sdk_metadata(existing_metadata: dict[str, Any] | None = None) -> dict[str, Any]: """Build SDK metadata to attach to individual items - NEW""" now = datetime.now(UTC).isoformat(timespec="seconds") @@ -559,6 +576,7 @@ def build_item_sdk_metadata(existing_metadata: dict[str, Any] | None = None) -> "updated_at": now, } + def validate_item_sdk_metadata(metadata: dict[str, Any]) -> bool: """Validate Item SDK metadata - NEW""" try: @@ -568,6 +586,7 @@ def validate_item_sdk_metadata(metadata: dict[str, Any]) -> bool: logger.exception("Item SDK metadata validation failed") raise + def get_item_sdk_metadata_json_schema() -> dict[str, Any]: """Get JSON Schema for Item SDK metadata - NEW""" schema = ItemSdkMetadata.model_json_schema() @@ -643,9 +662,8 @@ Comprehensive test suite in `tests/aignostics/platform/sdk_metadata_test.py`: # Global cache storage _operation_cache: dict[str, tuple[Any, float]] = {} -def cached_operation( - ttl: int, *, use_token: bool = True, instance_attrs: tuple[str, ...] | None = None -) -> Callable: + +def cached_operation(ttl: int, *, use_token: bool = True, instance_attrs: tuple[str, ...] | None = None) -> Callable: """Decorator for caching function results with TTL. Args: @@ -659,6 +677,7 @@ def cached_operation( - Deletes expired entries automatically - Stores new results with expiry timestamp """ + def decorator(func): def wrapper(*args, **kwargs): # Build cache key @@ -681,9 +700,12 @@ def cached_operation( result = func(*args, **kwargs) _operation_cache[key] = (result, time.time() + ttl) return result + return wrapper + return decorator + def operation_cache_clear(func: Callable | list[Callable] | None = None) -> int: """Clear operation cache, optionally filtering by function(s). @@ -707,10 +729,7 @@ def operation_cache_clear(func: Callable | list[Callable] | None = None) -> int: func_list = func if isinstance(func, list) else [func] func_qualified_names = [f.__qualname__ for f in func_list] - keys_to_remove = [ - key for key in _operation_cache - if any(name in key for name in func_qualified_names) - ] + keys_to_remove = [key for key in _operation_cache if any(name in key for name in func_qualified_names)] for key in keys_to_remove: del _operation_cache[key] @@ -740,16 +759,13 @@ auth_jwk_set_cache_ttl: int = 86400 # 1 day # From _client.py @cached_operation(ttl=settings().me_cache_ttl, use_token=True) def me_with_retry() -> Me: - return Retrying(...)( - lambda: self._api.get_me_v1_me_get(...) - ) + return Retrying(...)(lambda: self._api.get_me_v1_me_get(...)) + # From resources/runs.py @cached_operation(ttl=settings().run_cache_ttl, use_token=True) def details_with_retry(run_id: str) -> RunData: - return Retrying(...)( - lambda: self._api.get_run_v1_runs_run_id_get(run_id, ...) - ) + return Retrying(...)(lambda: self._api.get_run_v1_runs_run_id_get(run_id, ...)) ``` **Cache Invalidation Strategy:** @@ -806,6 +822,7 @@ run = client.runs.details(run_id, nocache=True) # Force API call applications = client.applications.list(nocache=True) # Bypass cache me = client.me(nocache=True) # Fresh user info + # Useful in tests to avoid race conditions def test_run_update(): run = client.runs.details(run_id, nocache=True) # Always fresh @@ -889,12 +906,12 @@ Comprehensive test suite in `tests/aignostics/platform/client_cache_test.py`: ```python # From _client.py and resources/*.py RETRYABLE_EXCEPTIONS = ( - ServiceException, # 5xx server errors - Urllib3TimeoutError, # Connection timeout - PoolError, # Connection pool exhausted - IncompleteRead, # Partial response received - ProtocolError, # Protocol violation - ProxyError, # Proxy connection failed + ServiceException, # 5xx server errors + Urllib3TimeoutError, # Connection timeout + PoolError, # Connection pool exhausted + IncompleteRead, # Partial response received + ProtocolError, # Protocol violation + ProxyError, # Proxy connection failed ) ``` @@ -909,14 +926,14 @@ def me_with_retry() -> Me: stop=stop_after_attempt(settings().me_retry_attempts), # Max 4 attempts wait=wait_exponential_jitter( initial=settings().me_retry_wait_min, # 0.1s - max=settings().me_retry_wait_max # 60s + max=settings().me_retry_wait_max, # 60s ), before_sleep=before_sleep_log(logger, logging.WARNING), reraise=True, # Re-raise after all attempts exhausted )( lambda: self._api.get_me_v1_me_get( _request_timeout=settings().me_timeout, # 30s - _headers={"User-Agent": user_agent()} + _headers={"User-Agent": user_agent()}, ) ) ``` @@ -926,7 +943,7 @@ def me_with_retry() -> Me: ```python # Defaults (from _settings.py) RETRY_ATTEMPTS_DEFAULT = 4 -RETRY_WAIT_MIN_DEFAULT = 0.1 # seconds +RETRY_WAIT_MIN_DEFAULT = 0.1 # seconds RETRY_WAIT_MAX_DEFAULT = 60.0 # seconds TIMEOUT_DEFAULT = 30.0 # seconds @@ -1039,23 +1056,29 @@ Comprehensive test suite in `tests/aignostics/platform/client_me_retry_test.py`: ```python # From codegen/out/aignx/codegen/models/ + class RunState(str, Enum): """Run lifecycle states.""" - PENDING = 'PENDING' # Run created, waiting to start - PROCESSING = 'PROCESSING' # Run actively processing items - TERMINATED = 'TERMINATED' # Run completed (check termination_reason) + + PENDING = "PENDING" # Run created, waiting to start + PROCESSING = "PROCESSING" # Run actively processing items + TERMINATED = "TERMINATED" # Run completed (check termination_reason) + class ItemState(str, Enum): """Item (slide) processing states.""" - PENDING = 'PENDING' # Item queued for processing - PROCESSING = 'PROCESSING' # Item being analyzed - TERMINATED = 'TERMINATED' # Item processing done (check termination_reason) + + PENDING = "PENDING" # Item queued for processing + PROCESSING = "PROCESSING" # Item being analyzed + TERMINATED = "TERMINATED" # Item processing done (check termination_reason) + class ArtifactState(str, Enum): """Individual artifact processing states.""" - PENDING = 'PENDING' # Artifact generation pending - PROCESSING = 'PROCESSING' # Artifact being created - TERMINATED = 'TERMINATED' # Artifact ready or failed + + PENDING = "PENDING" # Artifact generation pending + PROCESSING = "PROCESSING" # Artifact being created + TERMINATED = "TERMINATED" # Artifact ready or failed ``` **New Termination Reason Enums:** @@ -1063,22 +1086,27 @@ class ArtifactState(str, Enum): ```python class RunTerminationReason(str, Enum): """Why a run terminated.""" - ALL_ITEMS_PROCESSED = 'ALL_ITEMS_PROCESSED' # Normal completion - CANCELED_BY_SYSTEM = 'CANCELED_BY_SYSTEM' # System initiated cancellation - CANCELED_BY_USER = 'CANCELED_BY_USER' # User canceled the run + + ALL_ITEMS_PROCESSED = "ALL_ITEMS_PROCESSED" # Normal completion + CANCELED_BY_SYSTEM = "CANCELED_BY_SYSTEM" # System initiated cancellation + CANCELED_BY_USER = "CANCELED_BY_USER" # User canceled the run + class ItemTerminationReason(str, Enum): """Why an item terminated.""" - SUCCEEDED = 'SUCCEEDED' # Item processed successfully - USER_ERROR = 'USER_ERROR' # Input validation or user-caused error - SYSTEM_ERROR = 'SYSTEM_ERROR' # Infrastructure or application error - SKIPPED = 'SKIPPED' # Item skipped (e.g., duplicate) + + SUCCEEDED = "SUCCEEDED" # Item processed successfully + USER_ERROR = "USER_ERROR" # Input validation or user-caused error + SYSTEM_ERROR = "SYSTEM_ERROR" # Infrastructure or application error + SKIPPED = "SKIPPED" # Item skipped (e.g., duplicate) + class ArtifactTerminationReason(str, Enum): """Why an artifact terminated.""" - SUCCEEDED = 'SUCCEEDED' # Artifact created successfully - USER_ERROR = 'USER_ERROR' # Input validation error - SYSTEM_ERROR = 'SYSTEM_ERROR' # Generation failed due to system issue + + SUCCEEDED = "SUCCEEDED" # Artifact created successfully + USER_ERROR = "USER_ERROR" # Input validation error + SYSTEM_ERROR = "SYSTEM_ERROR" # Generation failed due to system issue ``` **State Machine Architecture:** @@ -1115,34 +1143,44 @@ PENDING → PROCESSING → TERMINATED ```python class RunOutput(BaseModel): """Run execution results summary.""" + state: RunState termination_reason: RunTerminationReason | None statistics: RunItemStatistics # NEW: Aggregate item counts # ... other fields + class ItemOutput(BaseModel): """Individual item processing results.""" + state: ItemState termination_reason: ItemTerminationReason | None artifacts: list[ArtifactOutput] # List of output artifacts # ... other fields + class ArtifactOutput(BaseModel): """Individual artifact details.""" + state: ArtifactState termination_reason: ArtifactTerminationReason | None - download_url: str | None # Available when SUCCEEDED + output_artifact_id: str # Used to resolve a fresh presigned URL via Run.get_artifact_download_url(...) + download_url: str | None # DEPRECATED — populated for backwards compatibility but may stop being emitted by SAMIA + # at any time. Resolve a short-lived presigned URL on demand instead via + # Run.get_artifact_download_url(artifact.output_artifact_id). # ... other fields + class RunItemStatistics(BaseModel): """NEW: Aggregate statistics for run.""" - total: int # Total items in run - succeeded: int # Successfully processed - user_error: int # Failed due to user errors - system_error: int # Failed due to system errors - skipped: int # Skipped items - pending: int # Not yet started - processing: int # Currently processing + + total: int # Total items in run + succeeded: int # Successfully processed + user_error: int # Failed due to user errors + system_error: int # Failed due to system errors + skipped: int # Skipped items + pending: int # Not yet started + processing: int # Currently processing ``` **Model Migrations (Deleted Models):** @@ -1192,11 +1230,15 @@ for item in run.results(): if item.output.state == ItemState.TERMINATED: if item.output.termination_reason == ItemTerminationReason.SUCCEEDED: print(f"Item {item.item_id} succeeded") - # Access artifacts + # Access artifacts — resolve a fresh presigned URL via the /file endpoint. + # The legacy `artifact.download_url` field is deprecated and may stop being + # populated by SAMIA at any time; use Run.get_artifact_download_url(...) so + # the URL is always fresh and the SDK retries the resolve on transient errors. for artifact in item.output.artifacts: if artifact.state == ArtifactState.TERMINATED: if artifact.termination_reason == ArtifactTerminationReason.SUCCEEDED: - print(f" - Artifact ready: {artifact.download_url}") + signed_url = run.get_artifact_download_url(artifact.output_artifact_id) + print(f" - Artifact ready: {signed_url}") elif item.output.termination_reason == ItemTerminationReason.USER_ERROR: print(f"Item {item.item_id} failed: user error") elif item.output.termination_reason == ItemTerminationReason.SYSTEM_ERROR: @@ -1260,16 +1302,13 @@ for app in client.applications.list(): # Get application version app_version = client.application_version( application_id="heta", - version_number="1.0.0" # Omit for latest version + version_number="1.0.0", # Omit for latest version ) print(f"Application: {app_version.application_id}") print(f"Version: {app_version.version_number}") # Get latest version -latest = client.application_version( - application_id="heta", - version_number=None -) +latest = client.application_version(application_id="heta", version_number=None) # Get specific run run = client.run("run-id-123") @@ -1287,11 +1326,7 @@ for run in runs: ```python from aignostics.platform import Client -from aignostics.platform._sdk_metadata import ( - build_sdk_metadata, - validate_sdk_metadata, - get_sdk_metadata_json_schema -) +from aignostics.platform._sdk_metadata import build_sdk_metadata, validate_sdk_metadata, get_sdk_metadata_json_schema # SDK metadata is AUTOMATICALLY attached to every run submission client = Client() @@ -1304,7 +1339,7 @@ run = client.runs.submit( "experiment_id": "exp-123", "dataset_version": "v2.1", # SDK metadata will be added under "sdk" key automatically - } + }, ) # Access SDK metadata from run @@ -1354,12 +1389,14 @@ def mock_settings(): mock.return_value = settings yield mock + @pytest.fixture(autouse=True) def mock_can_open_browser(): """Prevent browser opening in tests.""" with patch("aignostics.platform._authentication._can_open_browser", return_value=False): yield + @pytest.fixture(autouse=True) def mock_webbrowser(): """Prevent actual browser launch.""" @@ -1375,6 +1412,7 @@ def valid_token_with_expiry() -> str: future_time = int((datetime.now(tz=UTC) + timedelta(hours=1)).timestamp()) return f"valid.jwt.token:{future_time}" + def expired_token() -> str: """Create expired test token.""" past_time = int((datetime.now(tz=UTC) - timedelta(hours=1)).timestamp()) @@ -1388,10 +1426,8 @@ def expired_token() -> str: ```python def test_runs_list_with_pagination(runs, mock_api): # Setup pages - page1 = [Mock(spec=RunReadResponse, run_id=f"run-{i}") - for i in range(PAGE_SIZE)] - page2 = [Mock(spec=RunReadResponse, run_id=f"run-{i + PAGE_SIZE}") - for i in range(5)] + page1 = [Mock(spec=RunReadResponse, run_id=f"run-{i}") for i in range(PAGE_SIZE)] + page2 = [Mock(spec=RunReadResponse, run_id=f"run-{i + PAGE_SIZE}") for i in range(5)] mock_api.list_application_runs_v1_runs_get.side_effect = [page1, page2] @@ -1415,8 +1451,6 @@ def test_runs_list_with_pagination(runs, mock_api): **Logging (Actual Pattern from Code):** ```python - - logger.trace("Initializing client with cache_token={}", cache_token) logger.trace("Client initialized successfully.") logger.exception("Failed to initialize client.") @@ -1488,7 +1522,7 @@ app = app_dict.get("app-id") # For version lookups, use direct API call version = client.application_version( application_id="heta", - version_number="1.0.0" # or None for latest + version_number="1.0.0", # or None for latest ) # Access version attributes print(f"App: {version.application_id}, Version: {version.version_number}") diff --git a/src/aignostics/platform/__init__.py b/src/aignostics/platform/__init__.py index 8036b6b98..1fcaf481c 100644 --- a/src/aignostics/platform/__init__.py +++ b/src/aignostics/platform/__init__.py @@ -13,10 +13,8 @@ from aignx.codegen.exceptions import ApiException, ForbiddenException, NotFoundException from aignx.codegen.models import ApplicationReadResponse as Application from aignx.codegen.models import ApplicationReadShortResponse as ApplicationSummary -from aignx.codegen.models import InputArtifact as InputArtifactData -from aignx.codegen.models import InputArtifactCreationRequest as InputArtifact -from aignx.codegen.models import ItemCreationRequest as InputItem from aignx.codegen.models import ( + ArtifactOutput, ItemOutput, ItemState, ItemTerminationReason, @@ -25,6 +23,9 @@ RunState, RunTerminationReason, ) +from aignx.codegen.models import InputArtifact as InputArtifactData +from aignx.codegen.models import InputArtifactCreationRequest as InputArtifact +from aignx.codegen.models import ItemCreationRequest as InputItem from aignx.codegen.models import ItemResultReadResponse as ItemResult from aignx.codegen.models import MeReadResponse as Me from aignx.codegen.models import OrganizationReadResponse as Organization @@ -71,6 +72,10 @@ REDIRECT_URI_PRODUCTION, REDIRECT_URI_STAGING, REDIRECT_URI_TEST, + STATUS_PAGE_URL_DEV, + STATUS_PAGE_URL_PRODUCTION, + STATUS_PAGE_URL_STAGING, + STATUS_PAGE_URL_TEST, TOKEN_URL_DEV, TOKEN_URL_PRODUCTION, TOKEN_URL_STAGING, @@ -91,7 +96,7 @@ get_mime_type_for_artifact, mime_type_to_file_ending, ) -from .resources.runs import LIST_APPLICATION_RUNS_MAX_PAGE_SIZE, LIST_APPLICATION_RUNS_MIN_PAGE_SIZE, Run +from .resources.runs import LIST_APPLICATION_RUNS_MAX_PAGE_SIZE, LIST_APPLICATION_RUNS_MIN_PAGE_SIZE, Artifact, Run __all__ = [ "API_ROOT_DEV", @@ -112,14 +117,10 @@ "CLIENT_ID_INTERACTIVE_STAGING", "CLIENT_ID_INTERACTIVE_TEST", "DEFAULT_CPU_PROVISIONING_MODE", - "DEFAULT_CPU_PROVISIONING_MODE", "DEFAULT_FLEX_START_MAX_RUN_DURATION_MINUTES", "DEFAULT_GPU_PROVISIONING_MODE", - "DEFAULT_GPU_PROVISIONING_MODE", - "DEFAULT_GPU_TYPE", "DEFAULT_GPU_TYPE", "DEFAULT_MAX_GPUS_PER_SLIDE", - "DEFAULT_MAX_GPUS_PER_SLIDE", "DEFAULT_NODE_ACQUISITION_TIMEOUT_MINUTES", "DEVICE_URL_DEV", "DEVICE_URL_PRODUCTION", @@ -132,11 +133,14 @@ "LIST_APPLICATION_RUNS_MAX_PAGE_SIZE", "LIST_APPLICATION_RUNS_MIN_PAGE_SIZE", "NOT_YET_IMPLEMENTED", - "NOT_YET_IMPLEMENTED", "REDIRECT_URI_DEV", "REDIRECT_URI_PRODUCTION", "REDIRECT_URI_STAGING", "REDIRECT_URI_TEST", + "STATUS_PAGE_URL_DEV", + "STATUS_PAGE_URL_PRODUCTION", + "STATUS_PAGE_URL_STAGING", + "STATUS_PAGE_URL_TEST", "TOKEN_URL_DEV", "TOKEN_URL_PRODUCTION", "TOKEN_URL_STAGING", @@ -146,6 +150,8 @@ "Application", "ApplicationSummary", "ApplicationVersion", + "Artifact", + "ArtifactOutput", "Client", "ForbiddenException", "InputArtifact", @@ -167,7 +173,6 @@ "RunOutput", "RunSdkMetadata", "RunState", - "RunState", "RunTerminationReason", "SchedulingMetadata", "Service", diff --git a/src/aignostics/platform/_client.py b/src/aignostics/platform/_client.py index 02f8dd014..84167ae4b 100644 --- a/src/aignostics/platform/_client.py +++ b/src/aignostics/platform/_client.py @@ -169,12 +169,12 @@ def application(self, application_id: str, nocache: bool = False) -> Application nocache (bool): If True, skip reading from cache and fetch fresh data from the API. The fresh result will still be cached for subsequent calls. Defaults to False. + Returns: + Application: The application object. + Raises: NotFoundException: If the application with the given ID is not found. aignx.codegen.exceptions.ApiException: If the API call fails. - - Returns: - Application: The application object. """ @cached_operation(ttl=settings().application_cache_ttl, use_token=True) @@ -211,13 +211,13 @@ def application_version( nocache (bool): If True, skip reading from cache and fetch fresh data from the API. The fresh result will still be cached for subsequent calls. Defaults to False. + Returns: + ApplicationVersion: The application version object. + Raises: NotFoundException: If the application with the given ID and version number is not found. ValueError: If the version is not valid semver. aignx.codegen.exceptions.ApiException: If the API call fails. - - Returns: - ApplicationVersion: The application version object. """ # Handle version resolution and validation first (not retried) if version_number is None: diff --git a/src/aignostics/platform/_constants.py b/src/aignostics/platform/_constants.py index 8a4c0ddba..59d9e573c 100644 --- a/src/aignostics/platform/_constants.py +++ b/src/aignostics/platform/_constants.py @@ -8,6 +8,7 @@ REDIRECT_URI_DEV = "http://localhost:8989/" DEVICE_URL_DEV = "https://dev-8ouohmmrbuh2h4vu.eu.auth0.com/oauth/device/code" JWS_JSON_URL_DEV = "https://dev-8ouohmmrbuh2h4vu.eu.auth0.com/.well-known/jwks.json" +STATUS_PAGE_URL_DEV: str | None = None # No dedicated public Betterstack status page for dev API_ROOT_TEST = "https://platform-test.aignostics.ai" CLIENT_ID_INTERACTIVE_TEST = "gqduveFvx7LX90drQPGzr4JGUYdh24gA" # not a secret, but a public client ID (same as dev) @@ -17,6 +18,7 @@ REDIRECT_URI_TEST = "http://localhost:8989/" DEVICE_URL_TEST = "https://dev-8ouohmmrbuh2h4vu.eu.auth0.com/oauth/device/code" JWS_JSON_URL_TEST = "https://dev-8ouohmmrbuh2h4vu.eu.auth0.com/.well-known/jwks.json" +STATUS_PAGE_URL_TEST: str | None = None # No dedicated public Betterstack status page for test API_ROOT_STAGING = "https://platform-staging.aignostics.com" CLIENT_ID_INTERACTIVE_STAGING = "fQkbvYzQPPVwLxc3uque5JsyFW00rJ7b" # not a secret, but a public client ID @@ -26,6 +28,7 @@ REDIRECT_URI_STAGING = "http://localhost:8989/" DEVICE_URL_STAGING = "https://aignostics-platform-staging.eu.auth0.com/oauth/device/code" JWS_JSON_URL_STAGING = "https://aignostics-platform-staging.eu.auth0.com/.well-known/jwks.json" +STATUS_PAGE_URL_STAGING = "https://status.platform-staging.aignostics.com" API_ROOT_PRODUCTION = "https://platform.aignostics.com" CLIENT_ID_INTERACTIVE_PRODUCTION = "YtJ7F9lAtxx16SZGQlYPe6wcjlXB78MM" # not a secret, but a public client ID @@ -35,6 +38,7 @@ REDIRECT_URI_PRODUCTION = "http://localhost:8989/" DEVICE_URL_PRODUCTION = "https://aignostics-platform.eu.auth0.com/oauth/device/code" JWS_JSON_URL_PRODUCTION = "https://aignostics-platform.eu.auth0.com/.well-known/jwks.json" +STATUS_PAGE_URL_PRODUCTION = "https://status.platform.aignostics.com" # Pipeline orchestration defaults DEFAULT_GPU_TYPE = "A100" diff --git a/src/aignostics/platform/_settings.py b/src/aignostics/platform/_settings.py index 5b51c50a6..5edfccb4c 100644 --- a/src/aignostics/platform/_settings.py +++ b/src/aignostics/platform/_settings.py @@ -50,6 +50,10 @@ REDIRECT_URI_PRODUCTION, REDIRECT_URI_STAGING, REDIRECT_URI_TEST, + STATUS_PAGE_URL_DEV, + STATUS_PAGE_URL_PRODUCTION, + STATUS_PAGE_URL_STAGING, + STATUS_PAGE_URL_TEST, TOKEN_URL_DEV, TOKEN_URL_PRODUCTION, TOKEN_URL_STAGING, @@ -106,6 +110,39 @@ def _validate_url(value: str) -> str: return value +def _validate_optional_url(value: str | None) -> str | None: + """Validate an optional URL setting. + + Used for URL settings that may be unset (None) or set to an http(s) URL. Empty strings + (which env-var loaders may produce when the variable is set but empty) are coerced to + None so they round-trip through GUI None-checks correctly. Non-empty values must pass + a stricter check than _validate_url: in addition to scheme/netloc validation, the URL + must not contain characters that could break out of an HTML attribute when interpolated + into raw markup. RFC 3986 requires those characters to be percent-encoded; their raw + form is either malformed or an injection attempt and is rejected here. + + Args: + value: The string to validate, or None. + + Returns: + The validated URL string, or None if input was None or an empty string. + + Raises: + ValueError: If the string is non-empty and either not a valid http(s) URL or + contains characters that are unsafe to interpolate into HTML. + """ + if not value: + return None + forbidden = {'"', "'", "<", ">", "`", "\\"} + if any(c in value for c in forbidden) or any(c.isspace() for c in value): + msg = ( + "URL must not contain quote, angle-bracket, backtick, backslash, or whitespace " + f"characters (RFC 3986 requires percent-encoding): {value!r}" + ) + raise ValueError(msg) + return _validate_url(value) + + class Settings(OpaqueSettings): """Configuration settings for the Aignostics SDK. @@ -206,6 +243,21 @@ def profile_edit_url(self) -> str: ] client_id_interactive: Annotated[str, Field(description="OAuth client ID for interactive flows")] + status_page_url: Annotated[ + str | None, + BeforeValidator(_validate_optional_url), + Field( + description=( + "Public Betterstack status page URL for the platform environment. None when no public status " + "page exists for the resolved api_root (dev/test or unknown environments). Empty strings are " + "coerced to None. The Launchpad GUI hides the status badge and 'Check Platform Status' menu " + "link when this is None. Validation rejects non-http(s) schemes and characters that would " + "break out of an HTML attribute when interpolated into the embedded iframe." + ), + default=None, + ), + ] = None + organization_id: Annotated[ str | None, Field(description="Optional Auth0 organization ID parameter for the /authorize OAuth endpoint") ] = None @@ -539,6 +591,7 @@ def pre_init(cls, values: dict) -> dict: # type: ignore[type-arg] # noqa: N805 values["device_url"] = DEVICE_URL_DEV values["jws_json_url"] = JWS_JSON_URL_DEV values["client_id_interactive"] = CLIENT_ID_INTERACTIVE_DEV + values.setdefault("status_page_url", STATUS_PAGE_URL_DEV) case x if x == API_ROOT_TEST: values["audience"] = AUDIENCE_TEST values["authorization_base_url"] = AUTHORIZATION_BASE_URL_TEST @@ -547,6 +600,7 @@ def pre_init(cls, values: dict) -> dict: # type: ignore[type-arg] # noqa: N805 values["device_url"] = DEVICE_URL_TEST values["jws_json_url"] = JWS_JSON_URL_TEST values["client_id_interactive"] = CLIENT_ID_INTERACTIVE_TEST + values.setdefault("status_page_url", STATUS_PAGE_URL_TEST) case x if x == API_ROOT_STAGING: values["audience"] = AUDIENCE_STAGING values["authorization_base_url"] = AUTHORIZATION_BASE_URL_STAGING @@ -555,6 +609,7 @@ def pre_init(cls, values: dict) -> dict: # type: ignore[type-arg] # noqa: N805 values["device_url"] = DEVICE_URL_STAGING values["jws_json_url"] = JWS_JSON_URL_STAGING values["client_id_interactive"] = CLIENT_ID_INTERACTIVE_STAGING + values.setdefault("status_page_url", STATUS_PAGE_URL_STAGING) case x if x == API_ROOT_PRODUCTION: values["audience"] = AUDIENCE_PRODUCTION values["authorization_base_url"] = AUTHORIZATION_BASE_URL_PRODUCTION @@ -563,6 +618,7 @@ def pre_init(cls, values: dict) -> dict: # type: ignore[type-arg] # noqa: N805 values["device_url"] = DEVICE_URL_PRODUCTION values["jws_json_url"] = JWS_JSON_URL_PRODUCTION values["client_id_interactive"] = CLIENT_ID_INTERACTIVE_PRODUCTION + values.setdefault("status_page_url", STATUS_PAGE_URL_PRODUCTION) case _: raise ValueError(UNKNOWN_ENDPOINT_URL) diff --git a/src/aignostics/platform/resources/runs.py b/src/aignostics/platform/resources/runs.py index f2b7a49b9..ecb1a3fac 100644 --- a/src/aignostics/platform/resources/runs.py +++ b/src/aignostics/platform/resources/runs.py @@ -7,14 +7,17 @@ import builtins import time import typing as t -from collections.abc import Iterator +from collections.abc import Callable, Iterator +from http import HTTPStatus from pathlib import Path from time import sleep from typing import Any, cast +import requests from aignx.codegen.api.public_api import PublicApi -from aignx.codegen.exceptions import NotFoundException, ServiceException +from aignx.codegen.exceptions import ApiException, NotFoundException, ServiceException from aignx.codegen.models import ( + ArtifactOutput, CustomMetadataUpdateRequest, ItemCreationRequest, ItemOutput, @@ -49,6 +52,7 @@ from urllib3.exceptions import IncompleteRead, PoolError, ProtocolError, ProxyError from urllib3.exceptions import TimeoutError as Urllib3TimeoutError +from aignostics.platform._authentication import get_token from aignostics.platform._operation_cache import cached_operation, operation_cache_clear from aignostics.platform._sdk_metadata import ( build_item_sdk_metadata, @@ -105,6 +109,159 @@ class DownloadTimeoutError(RuntimeError): """Exception raised when the download operation exceeds its timeout.""" +_REDIRECT_STATUSES = frozenset({ + HTTPStatus.MOVED_PERMANENTLY, + HTTPStatus.FOUND, + HTTPStatus.SEE_OTHER, + HTTPStatus.TEMPORARY_REDIRECT, + HTTPStatus.PERMANENT_REDIRECT, +}) + + +class Artifact: + """Represents a single output artifact belonging to a run. + + Provides operations to resolve a fresh presigned download URL via the + ``GET /api/v1/runs/{run_id}/artifacts/{artifact_id}/file`` endpoint. + """ + + def __init__(self, api: PublicApi, run_id: str, artifact_id: str) -> None: + """Initializes an Artifact instance. + + Args: + api (PublicApi): The configured API client. + run_id (str): The ID of the parent run. + artifact_id (str): The ID of the output artifact. + """ + self._api = api + self.run_id = run_id + self.artifact_id = artifact_id + + def get_download_url(self) -> str: + """Resolve a fresh presigned download URL for this artifact. + + Calls ``GET /api/v1/runs/{run_id}/artifacts/{artifact_id}/file`` with + ``allow_redirects=False`` and returns the presigned URL from the redirect + ``Location`` header. The presigned URL is short-lived; resolve immediately + before downloading. + + The generated client cannot be used directly because urllib3 follows the + redirect automatically and would fetch the artifact body — losing the URL + we need for streaming/chunked downloads with checksum verification. + + Returns: + str: A time-limited presigned URL. + + Raises: + NotFoundException: 404 — artifact not found for the run. + ApiException: Other 4xx (e.g. 403 forbidden, 410 gone). + ServiceException: 5xx, request timeouts, or connection errors + (after retry attempts have been exhausted). + RuntimeError: 3xx response with no Location header, or any other + unexpected status the API contract does not define. + """ + configuration = self._api.api_client.configuration + host = configuration.host.rstrip("/") + endpoint_url = f"{host}/api/v1/runs/{self.run_id}/artifacts/{self.artifact_id}/file" + proxy = getattr(configuration, "proxy", None) + ssl_ca_cert = getattr(configuration, "ssl_ca_cert", None) + verify_ssl = getattr(configuration, "verify_ssl", True) + ssl_verify: bool | str = ssl_ca_cert or verify_ssl + # Honor the codegen client's token_provider when set: Client.get_api_client() + # wires it up with use_cache=cache_token, so a user who instantiates + # Client(cache_token=False) does not want us to read/write the token cache. + # Fall back to get_token() only when the configuration was built outside + # of Client (e.g. unit tests with bare PublicApi). + token_provider = getattr(configuration, "token_provider", None) or get_token + + return Retrying( + retry=retry_if_exception_type(exception_types=RETRYABLE_EXCEPTIONS), + stop=stop_after_attempt(settings().run_retry_attempts), + wait=wait_exponential_jitter(initial=settings().run_retry_wait_min, max=settings().run_retry_wait_max), + before_sleep=_log_retry_attempt, + reraise=True, + )(lambda: self._fetch_redirect_url(endpoint_url, ssl_verify, proxy, token_provider)) + + def _fetch_redirect_url( + self, + endpoint_url: str, + ssl_verify: bool | str, + proxy: str | None, + token_provider: Callable[[], str], + ) -> str: + """Issue the GET and return the presigned URL from the 3xx Location header. + + Args: + endpoint_url: Full /file endpoint URL. + ssl_verify: True/False or CA bundle path, mirroring the codegen client config. + proxy: Optional HTTP/HTTPS proxy URL, mirroring the codegen client config. + token_provider: Callable returning a fresh bearer token. Honors the + codegen client's ``cache_token`` choice when sourced from + ``Configuration.token_provider``. + + Returns: + str: The presigned URL extracted from the Location header. + + Raises: + NotFoundException: 404. + ApiException: 4xx other than 404. + ServiceException: 5xx or transient network errors (caught & wrapped so + the outer Retrying picks them up). + RuntimeError: 3xx without a Location header, or unexpected non-3xx status. + """ + try: + with requests.get( + endpoint_url, + headers={ + "Authorization": f"Bearer {token_provider()}", + "User-Agent": user_agent(), + }, + allow_redirects=False, + timeout=settings().run_timeout, + proxies={"http": proxy, "https": proxy} if proxy else None, + verify=ssl_verify, + stream=True, + ) as response: + if response.status_code in _REDIRECT_STATUSES: + location = response.headers.get("Location") + if not location: + msg = ( + f"Redirect response {response.status_code} from /file endpoint " + f"missing Location header for artifact {self.artifact_id}" + ) + raise RuntimeError(msg) + return location + if response.status_code == HTTPStatus.NOT_FOUND: + raise NotFoundException( + status=HTTPStatus.NOT_FOUND.value, + reason=f"Artifact {self.artifact_id} not found in run {self.run_id}", + ) + if response.status_code >= HTTPStatus.INTERNAL_SERVER_ERROR: + raise ServiceException(status=response.status_code, reason=response.reason) + if response.status_code >= HTTPStatus.BAD_REQUEST: + raise ApiException(status=response.status_code, reason=response.reason) + msg = ( + f"Unexpected status {response.status_code} from /file endpoint " + f"for artifact {self.artifact_id}; expected a redirect" + ) + raise RuntimeError(msg) + except requests.Timeout as e: + raise ServiceException( + status=HTTPStatus.SERVICE_UNAVAILABLE.value, + reason="Request timed out", + ) from e + except requests.ConnectionError as e: + raise ServiceException( + status=HTTPStatus.SERVICE_UNAVAILABLE.value, + reason="Connection failed", + ) from e + except requests.RequestException as e: + raise ServiceException( + status=HTTPStatus.SERVICE_UNAVAILABLE.value, + reason=f"Request failed: {e}", + ) from e + + class Run: """Represents a single application run. @@ -316,7 +473,9 @@ def download_to_folder( # noqa: C901 raise DownloadTimeoutError(msg) # noqa: TRY301 for item in self.results(nocache=True): if item.state == ItemState.TERMINATED and item.output == ItemOutput.FULL: - self.ensure_artifacts_downloaded(application_run_dir, item, checksum_attribute_key) + self.ensure_artifacts_downloaded( + application_run_dir, item, checksum_attribute_key, print_status=print_status + ) sleep(sleep_interval) application_run_state = self.details(nocache=True).state print(self) if print_status else None @@ -327,8 +486,10 @@ def download_to_folder( # noqa: C901 # check if last results have been downloaded yet and report on errors for item in self.results(nocache=True): - if ItemOutput.FULL: - self.ensure_artifacts_downloaded(application_run_dir, item, checksum_attribute_key) + if item.state == ItemState.TERMINATED and item.output == ItemOutput.FULL: + self.ensure_artifacts_downloaded( + application_run_dir, item, checksum_attribute_key, print_status=print_status + ) message = ( f"Output of item `{item.external_id}` is `{item.output}`, state `{item.state}`, " f"error `{item.error_message}` ({item.error_code}), " @@ -345,8 +506,40 @@ def download_to_folder( # noqa: C901 msg = f"Download operation failed unexpectedly for run {self.run_id}: {e}" raise RuntimeError(msg) from e - @staticmethod + def artifact(self, artifact_id: str) -> Artifact: + """Get an Artifact handle for resolving a presigned download URL. + + Args: + artifact_id (str): The output artifact ID + (``OutputArtifactResultReadResponse.output_artifact_id``). + + Returns: + Artifact: A handle bound to this run and the given artifact. + """ + return Artifact(self._api, self.run_id, artifact_id) + + def get_artifact_download_url(self, artifact_id: str) -> str: + """Resolve a fresh presigned download URL for an artifact of this run. + + Convenience wrapper around :meth:`artifact` and + :meth:`Artifact.get_download_url`. + + Args: + artifact_id (str): The output artifact ID. + + Returns: + str: A short-lived presigned URL. + + Raises: + NotFoundException: 404. + ApiException: Other 4xx. + ServiceException: 5xx or transient network errors. + RuntimeError: Unexpected response from the /file endpoint. + """ + return self.artifact(artifact_id).get_download_url() + def ensure_artifacts_downloaded( + self, base_folder: Path, item: ItemResultReadResponse, checksum_attribute_key: str = "checksum_base64_crc32c", @@ -355,6 +548,9 @@ def ensure_artifacts_downloaded( """Ensures all artifacts for an item are downloaded. Downloads missing or partially downloaded artifacts and verifies their integrity. + Resolves a fresh presigned URL for each artifact via the + ``/api/v1/runs/{run_id}/artifacts/{artifact_id}/file`` endpoint instead + of the deprecated ``OutputArtifactResultReadResponse.download_url`` field. Args: base_folder (Path): Base directory to download artifacts to. @@ -370,34 +566,36 @@ def ensure_artifacts_downloaded( downloaded_at_least_one_artifact = False for artifact in item.output_artifacts: - if artifact.download_url: - item_dir.mkdir(exist_ok=True, parents=True) - file_ending = mime_type_to_file_ending(get_mime_type_for_artifact(artifact)) - file_path = item_dir / f"{artifact.name}{file_ending}" - if not artifact.metadata: - logger.error( - "Skipping artifact %s for item %s, no metadata present", artifact.name, item.external_id - ) - print( - f"> Skipping artifact {artifact.name} for item {item.external_id}, no metadata present" - ) if print_status else None - continue - checksum = artifact.metadata[checksum_attribute_key] - - if file_path.exists(): - file_checksum = calculate_file_crc32c(file_path) - if file_checksum != checksum: - logger.trace("Resume download for {} to {}", artifact.name, file_path) - print(f"> Resume download for {artifact.name} to {file_path}") if print_status else None - else: - continue + if artifact.output != ArtifactOutput.AVAILABLE: + continue + # Metadata check must come BEFORE the MIME lookup: an artifact with + # empty metadata falls back to application/octet-stream, which + # mime_type_to_file_ending raises ValueError on — pre-empting the + # intended skip. (Caught by Copilot review on PR #598.) + if not artifact.metadata: + logger.error("Skipping artifact %s for item %s, no metadata present", artifact.name, item.external_id) + print( + f"> Skipping artifact {artifact.name} for item {item.external_id}, no metadata present" + ) if print_status else None + continue + item_dir.mkdir(exist_ok=True, parents=True) + file_ending = mime_type_to_file_ending(get_mime_type_for_artifact(artifact)) + file_path = item_dir / f"{artifact.name}{file_ending}" + checksum = artifact.metadata[checksum_attribute_key] + + if file_path.exists(): + file_checksum = calculate_file_crc32c(file_path) + if file_checksum != checksum: + logger.trace("Resume download for {} to {}", artifact.name, file_path) + print(f"> Resume download for {artifact.name} to {file_path}") if print_status else None else: - downloaded_at_least_one_artifact = True - logger.trace("Download for {} to {}", artifact.name, file_path) - print(f"> Download for {artifact.name} to {file_path}") if print_status else None + continue + else: + logger.trace("Download for {} to {}", artifact.name, file_path) + print(f"> Download for {artifact.name} to {file_path}") if print_status else None - # if file is not there at all or only partially downloaded yet - download_file(artifact.download_url, str(file_path), checksum) + downloaded_at_least_one_artifact = True + download_file(self.get_artifact_download_url(artifact.output_artifact_id), str(file_path), checksum) if downloaded_at_least_one_artifact: logger.trace("Downloaded results for item: {} to {}", item.external_id, item_dir) diff --git a/src/aignostics/platform/resources/utils.py b/src/aignostics/platform/resources/utils.py index 052a17a27..aaaf23649 100644 --- a/src/aignostics/platform/resources/utils.py +++ b/src/aignostics/platform/resources/utils.py @@ -33,13 +33,21 @@ def paginate(func: Callable[..., list[T]], *args: object, page_size: int = PAGE_ Yields: Individual items from all pages. + Raises: + ValueError: If page_size is 0 or negative. + Example: >>> def list_items(page=1, page_size=20): - ... # API call that returns a list of items for the given page - ... return [f"item_{i}" for i in range(page_size)] + ... offset = (page - 1) * page_size + ... count = page_size if page == 1 else 5 # partial last page + ... return [f"item_{offset + i}" for i in range(count)] >>> items = list(paginate(list_items)) >>> print(len(items)) + 25 """ + if page_size <= 0: + message = f"page_size must be a positive integer, got {page_size}" + raise ValueError(message) page = 1 while True: try: diff --git a/src/aignostics/qupath/CLAUDE.md b/src/aignostics/qupath/CLAUDE.md index 52448176d..e10480ecc 100644 --- a/src/aignostics/qupath/CLAUDE.md +++ b/src/aignostics/qupath/CLAUDE.md @@ -70,11 +70,7 @@ class Service(BaseService): _processes: dict[int, subprocess.Popen] = {} - def launch_qupath( - self, - project: Path | None = None, - headless: bool = False - ) -> int: + def launch_qupath(self, project: Path | None = None, headless: bool = False) -> int: """Launch QuPath with process tracking.""" cmd = [str(self.get_qupath_executable())] @@ -83,10 +79,7 @@ class Service(BaseService): if headless: cmd.append("--headless") - process = subprocess.Popen( - cmd, - creationflags=SUBPROCESS_CREATION_FLAGS - ) + process = subprocess.Popen(cmd, creationflags=SUBPROCESS_CREATION_FLAGS) self._processes[process.pid] = process return process.pid @@ -109,6 +102,7 @@ class AddProgress(BaseModel): return 0.0 return self.processed_images / self.total_images + class AnnotateProgress(BaseModel): """Progress tracking for annotations.""" @@ -127,6 +121,7 @@ class AnnotateProgress(BaseModel): ```python QUPATH_VERSION = "0.5.1" + def get_download_url(version: str, system: str, machine: str) -> str: """Get platform-specific QuPath download URL.""" @@ -135,7 +130,7 @@ def get_download_url(version: str, system: str, machine: str) -> str: ("Windows", "AMD64"): "win-x64", ("Darwin", "x86_64"): "mac-x64", ("Darwin", "arm64"): "mac-arm64", - ("Linux", "x86_64"): "linux-x64" + ("Linux", "x86_64"): "linux-x64", } platform_str = platform_map.get((system, machine)) @@ -155,17 +150,12 @@ class InstallProgressState(StrEnum): COMPLETED = "Installation complete" FAILED = "Installation failed" -def install_with_progress( - version: str, - path: Path, - progress_callback: Callable[[InstallProgress], None] -) -> None: + +def install_with_progress(version: str, path: Path, progress_callback: Callable[[InstallProgress], None]) -> None: """Install QuPath with progress updates.""" progress = InstallProgress( - state=InstallProgressState.DOWNLOADING, - total_size=get_download_size(version), - downloaded_size=0 + state=InstallProgressState.DOWNLOADING, total_size=get_download_size(version), downloaded_size=0 ) # Download with progress @@ -192,7 +182,7 @@ def create_project(project_path: Path, images: list[Path]) -> None: "createTimestamp": time.time() * 1000, "modifyTimestamp": time.time() * 1000, "uri": project_path.as_uri(), - "images": [] + "images": [], }) ) @@ -204,13 +194,7 @@ def create_project(project_path: Path, images: list[Path]) -> None: image_data_dir.mkdir(parents=True, exist_ok=True) # Create image data.qpdata file - (image_data_dir / "data.qpdata").write_text( - json.dumps({ - "path": str(image), - "id": image_id, - "metadata": {} - }) - ) + (image_data_dir / "data.qpdata").write_text(json.dumps({"path": str(image), "id": image_id, "metadata": {}})) ``` ### Script Execution @@ -219,18 +203,11 @@ def create_project(project_path: Path, images: list[Path]) -> None: ```python def run_script( - script_path: Path, - project: Path | None = None, - image: Path | None = None, - args: dict[str, Any] | None = None + script_path: Path, project: Path | None = None, image: Path | None = None, args: dict[str, Any] | None = None ) -> str: """Execute QuPath Groovy script.""" - cmd = [ - str(self.get_qupath_executable()), - "script", - str(script_path) - ] + cmd = [str(self.get_qupath_executable()), "script", str(script_path)] if project: cmd.extend(["--project", str(project)]) @@ -239,12 +216,7 @@ def run_script( if args: cmd.extend(["--args", json.dumps(args)]) - result = subprocess.run( - cmd, - capture_output=True, - text=True, - check=True - ) + result = subprocess.run(cmd, capture_output=True, text=True, check=True) return result.stdout ``` @@ -260,10 +232,7 @@ service = Service() # Install QuPath if not already installed if not service.is_qupath_installed(): - service.install_qupath( - version="0.5.1", - progress_callback=lambda p: print(f"Progress: {p.progress_normalized:.1%}") - ) + service.install_qupath(version="0.5.1", progress_callback=lambda p: print(f"Progress: {p.progress_normalized:.1%}")) # Launch QuPath pid = service.launch_qupath() @@ -289,15 +258,13 @@ images = [Path("slide1.svs"), Path("slide2.svs")] service.create_project(project_path, images) + # Add more images with progress def on_progress(progress: AddProgress): print(f"Adding images: {progress.processed_images}/{progress.total_images}") -service.add_images_to_project( - project_path, - additional_images, - progress_callback=on_progress -) + +service.add_images_to_project(project_path, additional_images, progress_callback=on_progress) ``` ### Script Automation @@ -305,11 +272,7 @@ service.add_images_to_project( ```python # Run analysis script script = Path("cell_detection.groovy") -results = service.run_script( - script_path=script, - project=project_path, - args={"threshold": 0.5, "min_area": 10} -) +results = service.run_script(script_path=script, project=project_path, args={"threshold": 0.5, "min_area": 10}) print(f"Script output: {results}") ``` @@ -397,16 +360,9 @@ aignostics qupath uninstall **Logging Patterns:** ```python -logger.debug("Installing QuPath", extra={ - "version": version, - "path": str(path), - "platform": f"{system}-{machine}" -}) - -logger.warning("QuPath process terminated unexpectedly", extra={ - "pid": pid, - "exit_code": process.returncode -}) +logger.debug("Installing QuPath", extra={"version": version, "path": str(path), "platform": f"{system}-{machine}"}) + +logger.warning("QuPath process terminated unexpectedly", extra={"pid": pid, "exit_code": process.returncode}) ``` ## Common Pitfalls & Solutions @@ -421,11 +377,7 @@ logger.warning("QuPath process terminated unexpectedly", extra={ def check_java_version() -> bool: """Verify Java 17+ is available.""" try: - result = subprocess.run( - ["java", "-version"], - capture_output=True, - text=True - ) + result = subprocess.run(["java", "-version"], capture_output=True, text=True) # Parse version from stderr return "17" in result.stderr or "18" in result.stderr except FileNotFoundError: @@ -441,8 +393,7 @@ def check_java_version() -> bool: ```python if platform.system() == "Linux" and platform.machine() in ["aarch64", "arm64"]: raise UnsupportedPlatformError( - "QuPath is not available for ARM64 Linux. " - "Consider using x86_64 emulation or container." + "QuPath is not available for ARM64 Linux. Consider using x86_64 emulation or container." ) ``` @@ -473,6 +424,7 @@ def mock_qupath_executable(): mock.return_value = Path("/mock/QuPath") yield mock + def test_launch_qupath(mock_qupath_executable): """Test QuPath launch.""" service = Service() @@ -530,9 +482,10 @@ SCRIPT_TEMPLATES = { def segmentTissue(server, params) { // Tissue segmentation logic } - """ + """, } + def get_script_template(name: str) -> str: """Get predefined script template.""" return SCRIPT_TEMPLATES.get(name, "") diff --git a/src/aignostics/qupath/_service.py b/src/aignostics/qupath/_service.py index a656a91ea..d651ad970 100644 --- a/src/aignostics/qupath/_service.py +++ b/src/aignostics/qupath/_service.py @@ -446,13 +446,13 @@ def _download_qupath( # noqa: C901, PLR0912, PLR0913, PLR0915, PLR0917 download_progress (Callable | None): Callback function for download progress. install_progress_queue (Any | None): Queue for download progress updates, if applicable. + Returns: + Path: The path object of the downloaded file. + Raises: ValueError: If the platform.system() is not supported. RuntimeError: If the download fails or if the file cannot be saved. Exception: If there is an error during the download. - - Returns: - Path: The path object of the downloaded file. """ system = platform.system() if platform_system is None else platform_system machine = platform.machine() if platform_machine is None else platform_machine @@ -593,12 +593,12 @@ def _extract_qupath( # noqa: C901, PLR0912, PLR0915 platform_system (str | None): The system platform. If None, it will use platform.system(). platform_machine (str | None): The machine architecture. If None, it will use platform.machine(). + Returns: + Path: The path to the extracted QuPath application directory. + Raises: ValueError: If there is broken input. RuntimeError: If an unexpected error happens. - - Returns: - Path: The path to the extracted QuPath application directory. """ system = platform.system() if platform_system is None else platform_system logger.trace("Extracting QuPath archive '{}' to '{}' for system {}", archive_path, installation_path, system) @@ -702,8 +702,10 @@ def _extract_qupath( # noqa: C901, PLR0912, PLR0915 [ "sh", "-c", - f"cd '{payload_extract_dir.resolve()!s}' && " - f"cat '{payload_path.resolve()!s}' | gunzip -dc | cpio -i", + ( + f"cd '{payload_extract_dir.resolve()!s}' && " + f"cat '{payload_path.resolve()!s}' | gunzip -dc | cpio -i" + ), ] if platform.system() == "Darwin" else ["7z", "x", str(payload_path.resolve()), f"-o{payload_extract_dir.resolve()!s}"] @@ -780,12 +782,12 @@ def install_qupath( # noqa: PLR0913, PLR0917 extract_progress (Callable | None): Callback function for extraction progress. progress_queue (queue.Queue[InstallProgress] | None): Queue for download progress updates, if applicable. + Returns: + Path: The path to the executable of the installed QuPath application. + Raises: RuntimeError: If the download fails or if the file cannot be extracted. Exception: If there is an error during the download or extraction. - - Returns: - Path: The path to the executable of the installed QuPath application. """ if path is None: path = Service.get_installation_path() diff --git a/src/aignostics/system/CLAUDE.md b/src/aignostics/system/CLAUDE.md index 3b3d01248..20cc5b1fa 100644 --- a/src/aignostics/system/CLAUDE.md +++ b/src/aignostics/system/CLAUDE.md @@ -79,6 +79,7 @@ def _abort_if_system_unhealthy() -> None: console.print(f"[error]Error:[/error] Platform is not healthy: {health.reason}. Aborting.") sys.exit(1) + # Called before upload and submit operations unless --force is used if not force: _abort_if_system_unhealthy() @@ -112,15 +113,12 @@ def health(self) -> Health: # Check health of EVERY discovered module for service_class in all_services: - module_name = service_class.__module__.split('.')[-2] # Extract module name + module_name = service_class.__module__.split(".")[-2] # Extract module name try: service_instance = service_class() components[module_name] = service_instance.health() except Exception as e: - components[module_name] = Health( - status=Health.Code.DOWN, - reason=str(e) - ) + components[module_name] = Health(status=Health.Code.DOWN, reason=str(e)) # Determine overall status based on ALL modules # Priority: DOWN > DEGRADED > UP @@ -133,18 +131,25 @@ def health(self) -> Health: ```python class AignosticsException(Exception): """Base exception for all SDK errors.""" + pass + class AuthenticationError(AignosticsException): """Authentication/authorization failures.""" + pass + class ConfigurationError(AignosticsException): """Configuration/settings errors.""" + pass + class NetworkError(AignosticsException): """Network/connectivity issues.""" + pass ``` @@ -166,20 +171,20 @@ def info(self, include_environ: bool = False, mask_secrets: bool = True) -> dict "machine": platform.machine(), "processor": platform.processor(), "python_version": sys.version, - "python_implementation": platform.python_implementation() + "python_implementation": platform.python_implementation(), }, "aignostics": { "version": __version__, "api_versions": API_VERSIONS, "modules": self._get_installed_modules(), - "extras": self._get_installed_extras() + "extras": self._get_installed_extras(), }, "runtime": { "cwd": os.getcwd(), "user": getpass.getuser(), "home": str(Path.home()), - "data_dir": str(get_user_data_directory()) - } + "data_dir": str(get_user_data_directory()), + }, } if include_environ: @@ -191,13 +196,8 @@ def info(self, include_environ: bool = False, mask_secrets: bool = True) -> dict **Secret Masking Pattern:** ```python -SENSITIVE_PATTERNS = [ - r".*TOKEN.*", - r".*SECRET.*", - r".*PASSWORD.*", - r".*API_KEY.*", - r".*PRIVATE.*" -] +SENSITIVE_PATTERNS = [r".*TOKEN.*", r".*SECRET.*", r".*PASSWORD.*", r".*API_KEY.*", r".*PRIVATE.*"] + def _mask_value(key: str, value: str, mask_secrets: bool) -> str: """Mask sensitive values based on key patterns.""" @@ -222,13 +222,13 @@ def proxy_request( url: str, proxy_host: str = HTTP_PROXY_DEFAULT_HOST, proxy_port: int = HTTP_PROXY_DEFAULT_PORT, - proxy_scheme: str = HTTP_PROXY_DEFAULT_SCHEME + proxy_scheme: str = HTTP_PROXY_DEFAULT_SCHEME, ) -> None: """Test HTTP request through proxy.""" proxies = { "http": f"{proxy_scheme}://{proxy_host}:{proxy_port}", - "https": f"{proxy_scheme}://{proxy_host}:{proxy_port}" + "https": f"{proxy_scheme}://{proxy_host}:{proxy_port}", } try: @@ -245,9 +245,11 @@ def proxy_request( ```python class OutputFormat(StrEnum): """Supported output formats.""" + YAML = "yaml" JSON = "json" + def format_output(data: Any, format: OutputFormat) -> str: """Format data for output.""" @@ -382,7 +384,7 @@ def detect_environment() -> dict: "is_gitlab_ci": os.getenv("GITLAB_CI") == "true", "is_jenkins": os.getenv("JENKINS_URL") is not None, "is_notebook": any(key.startswith("JUPYTER") for key in os.environ), - "is_vscode": os.getenv("VSCODE_PID") is not None + "is_vscode": os.getenv("VSCODE_PID") is not None, } ``` @@ -420,10 +422,7 @@ def health_with_timeout(module_name: str, timeout: float = 5.0) -> Health: try: return future.result(timeout=timeout) except concurrent.futures.TimeoutError: - return Health( - status=Health.Code.DOWN, - reason=f"Health check timed out after {timeout}s" - ) + return Health(status=Health.Code.DOWN, reason=f"Health check timed out after {timeout}s") ``` ### Circular Import Issues @@ -437,6 +436,7 @@ def health_with_timeout(module_name: str, timeout: float = 5.0) -> Health: def get_module_service(module_name: str): """Lazy import to avoid circular dependencies.""" from importlib import import_module + module = import_module(f"aignostics.{module_name}") return module.Service() ``` @@ -455,6 +455,7 @@ def test_health_aggregation(): assert "platform" in health.components assert isinstance(health.components, dict) + def test_secret_masking(): """Test sensitive values are masked.""" service = Service() @@ -502,7 +503,7 @@ def add_custom_info(info: dict) -> dict: "gpu_available": torch.cuda.is_available() if find_spec("torch") else False, "memory_gb": psutil.virtual_memory().total / (1024**3), "cpu_count": os.cpu_count(), - "disk_usage": shutil.disk_usage("/").used / shutil.disk_usage("/").total + "disk_usage": shutil.disk_usage("/").used / shutil.disk_usage("/").total, } return info diff --git a/src/aignostics/system/_gui.py b/src/aignostics/system/_gui.py index 31a33c341..54aad49ca 100644 --- a/src/aignostics/system/_gui.py +++ b/src/aignostics/system/_gui.py @@ -63,7 +63,9 @@ async def page_system() -> None: # noqa: PLR0915 # Mask secrets switch with reload functionality with ui.row().classes("w-full items-center gap-2 mb-4"): mask_secrets_switch = ui.switch( - text="Mask secrets", value=True, on_change=lambda e: load_info(mask_secrets=e.value) + text="Mask secrets", + value=True, + on_change=lambda e: load_info(mask_secrets=bool(e.value)), ) spinner = ui.spinner(size="lg").classes( diff --git a/src/aignostics/utils/CLAUDE.md b/src/aignostics/utils/CLAUDE.md index e5cc2fa6e..782d0c062 100644 --- a/src/aignostics/utils/CLAUDE.md +++ b/src/aignostics/utils/CLAUDE.md @@ -57,6 +57,7 @@ services = locate_implementations(BaseService) # Find all subclasses of a type subclasses = locate_subclasses(BaseService) + # Services inherit from BaseService class MyService(BaseService): async def health(self) -> Health: @@ -76,6 +77,7 @@ from typing import Annotated from fastapi import Depends from aignostics.my_module._service import Service + @router.get("/endpoint") async def endpoint(service: Annotated[Service, Depends(Service.get_service())]): return service.do_something() @@ -127,9 +129,11 @@ logger.debug("Application started", extra={"correlation_id": "123"}) from aignostics.utils import load_settings from pydantic import BaseModel + class MySettings(BaseModel): api_url: str = "https://api.example.com" + settings = load_settings(MySettings) ``` @@ -138,12 +142,10 @@ settings = load_settings(MySettings) ```python from aignostics.utils import Health, BaseService + class MyService(BaseService): async def health(self) -> Health: - return Health( - status=Health.Code.UP, - details={"database": "connected"} - ) + return Health(status=Health.Code.UP, details={"database": "connected"}) ``` **MCP Server Utilities:** @@ -160,7 +162,7 @@ from aignostics.utils import ( # Constants print(MCP_SERVER_NAME) # "Central Aignostics MCP Server" -print(MCP_TRANSPORT) # "stdio" +print(MCP_TRANSPORT) # "stdio" # Create and configure MCP server server = mcp_create_server() @@ -363,6 +365,7 @@ Plugins can expose MCP tools by: mcp = FastMCP("my_plugin") + @mcp.tool def my_tool(param: str) -> str: """Tool description.""" @@ -372,6 +375,7 @@ Plugins can expose MCP tools by: 3. Exporting the instance in `__init__.py`: ```python from ._mcp import mcp + __all__ = ["mcp"] ``` diff --git a/src/aignostics/utils/_constants.py b/src/aignostics/utils/_constants.py index 0ec4b1a2b..43726a296 100644 --- a/src/aignostics/utils/_constants.py +++ b/src/aignostics/utils/_constants.py @@ -43,7 +43,9 @@ f"{__project_name__.upper()}_ENVIRONMENT": lambda env: env, "ENV": lambda env: env, "VERCEL_ENV": lambda env: env, # See https://vercel.com/docs/environment-variables/system-environment-variables - "RAILWAY_ENVIRONMENT": lambda env: env, # See https://docs.railway.com/reference/variables#railway-provided-variables + "RAILWAY_ENVIRONMENT": lambda env: ( + env + ), # See https://docs.railway.com/reference/variables#railway-provided-variables } __env__ = "local" # Default for env_var, mapper in ENV_VAR_MAPPINGS.items(): @@ -65,8 +67,12 @@ # Determine __base_url__ PLATFORM_URL_MAPPINGS = { - "VERCEL_URL": lambda url: f"https://{url}", # See https://vercel.com/docs/environment-variables/system-environment-variables - "RAILWAY_PUBLIC_DOMAIN": lambda url: f"https://{url}", # See https://docs.railway.com/reference/variables#railway-provided-variables + "VERCEL_URL": lambda url: ( + f"https://{url}" + ), # See https://vercel.com/docs/environment-variables/system-environment-variables + "RAILWAY_PUBLIC_DOMAIN": lambda url: ( + f"https://{url}" + ), # See https://docs.railway.com/reference/variables#railway-provided-variables } __base__url__ = os.getenv(f"{__project_name__.upper()}_BASE_URL") if not __base__url__: diff --git a/src/aignostics/utils/_gui.py b/src/aignostics/utils/_gui.py index ff9019b45..d6eb18f7f 100644 --- a/src/aignostics/utils/_gui.py +++ b/src/aignostics/utils/_gui.py @@ -100,7 +100,7 @@ def gui_run( # noqa: PLR0913, PLR0917 class GUILocalFilePicker: """Local File Picker dialog class that lazy-loads NiceGUI dependencies.""" - def __new__( # noqa: C901 + def __new__( # noqa: C901, PYI034 cls, directory: str, *, @@ -159,7 +159,8 @@ def __init__( with self, ui.card(): self.add_drives_toggle() self.grid = ( - ui.aggrid( + ui + .aggrid( { "columnDefs": [{"field": "name", "headerName": "File"}], "rowSelection": "multiple" if multiple else "single", diff --git a/src/aignostics/utils/_service.py b/src/aignostics/utils/_service.py index e9d5eb5ef..6bfb31147 100644 --- a/src/aignostics/utils/_service.py +++ b/src/aignostics/utils/_service.py @@ -61,8 +61,7 @@ async def endpoint(service: Annotated[Service, Depends(Service.get_service())]): return cast("Callable[[], Generator[Self]]", cached) def dependency() -> Generator[Self]: - service = cls() - yield service + yield cls() setattr(cls, cache_attr, dependency) return dependency diff --git a/src/aignostics/wsi/CLAUDE.md b/src/aignostics/wsi/CLAUDE.md index 5a08cb10b..2443597b1 100644 --- a/src/aignostics/wsi/CLAUDE.md +++ b/src/aignostics/wsi/CLAUDE.md @@ -57,11 +57,13 @@ class WSIHandler(ABC): def get_metadata(self, wsi: WSIFile) -> dict: pass + # OpenSlide handler for .svs, .tiff, .ndpi class OpenSlideHandler(WSIHandler): def open(self, path: Path) -> OpenSlide: return openslide.OpenSlide(str(path)) + # PyDICOM handler for DICOM files class PyDICOMHandler(WSIHandler): def open(self, path: Path) -> Dataset: @@ -72,19 +74,20 @@ class PyDICOMHandler(WSIHandler): ```python WSI_SUPPORTED_FILE_EXTENSIONS = { - ".svs", # Aperio - ".tiff", # Generic TIFF - ".tif", # Generic TIFF - ".ndpi", # Hamamatsu - ".vms", # Hamamatsu - ".vmu", # Hamamatsu - ".scn", # Leica - ".mrxs", # MIRAX - ".bif", # Ventana - ".dcm", # DICOM - ".dicom" # DICOM + ".svs", # Aperio + ".tiff", # Generic TIFF + ".tif", # Generic TIFF + ".ndpi", # Hamamatsu + ".vms", # Hamamatsu + ".vmu", # Hamamatsu + ".scn", # Leica + ".mrxs", # MIRAX + ".bif", # Ventana + ".dcm", # DICOM + ".dicom", # DICOM } + def get_handler(file_path: Path) -> WSIHandler: """Get appropriate handler based on file extension.""" ext = file_path.suffix.lower() @@ -143,11 +146,7 @@ def get_metadata(self, wsi_path: Path) -> dict: """Extract all available metadata.""" handler = self.get_handler(wsi_path) - metadata = { - "file_path": str(wsi_path), - "file_size": wsi_path.stat().st_size, - "format": wsi_path.suffix.lower() - } + metadata = {"file_path": str(wsi_path), "file_size": wsi_path.stat().st_size, "format": wsi_path.suffix.lower()} if isinstance(handler, OpenSlideHandler): slide = handler.open(wsi_path) @@ -157,7 +156,7 @@ def get_metadata(self, wsi_path: Path) -> dict: "level_dimensions": slide.level_dimensions, "level_downsamples": slide.level_downsamples, "properties": dict(slide.properties), - "vendor": slide.properties.get("openslide.vendor", "Unknown") + "vendor": slide.properties.get("openslide.vendor", "Unknown"), }) elif isinstance(handler, PyDICOMHandler): @@ -169,7 +168,7 @@ def get_metadata(self, wsi_path: Path) -> dict: "rows": ds.get("Rows", 0), "columns": ds.get("Columns", 0), "number_of_frames": ds.get("NumberOfFrames", 1), - "photometric_interpretation": str(ds.get("PhotometricInterpretation", "")) + "photometric_interpretation": str(ds.get("PhotometricInterpretation", "")), }) return metadata @@ -180,15 +179,7 @@ def get_metadata(self, wsi_path: Path) -> dict: **Region of Interest Extraction:** ```python -def get_tile( - self, - wsi_path: Path, - x: int, - y: int, - width: int, - height: int, - level: int = 0 -) -> Image: +def get_tile(self, wsi_path: Path, x: int, y: int, width: int, height: int, level: int = 0) -> Image: """Extract tile from WSI at specified coordinates.""" handler = self.get_handler(wsi_path) @@ -227,17 +218,11 @@ from aignostics.wsi import Service from pathlib import Path # Get filtered DICOM files -files = Service.get_wsi_files_to_process( - path=Path("/data/dicoms"), - extension=".dcm" -) +files = Service.get_wsi_files_to_process(path=Path("/data/dicoms"), extension=".dcm") # Returns only highest resolution WSI files # For non-DICOM formats, returns all files -tiff_files = Service.get_wsi_files_to_process( - path=Path("/data/slides"), - extension=".tiff" -) +tiff_files = Service.get_wsi_files_to_process(path=Path("/data/slides"), extension=".tiff") # Returns all .tiff files (no filtering) ``` @@ -371,12 +356,7 @@ if service.is_supported_format(wsi_path): ```python # Extract specific region -tile = service.get_tile( - wsi_path, - x=1000, y=2000, - width=512, height=512, - level=0 -) +tile = service.get_tile(wsi_path, x=1000, y=2000, width=512, height=512, level=0) tile.save("tile.jpg") # Process multiple WSI files @@ -543,6 +523,7 @@ def sample_wsi(): """Provide sample WSI for testing.""" return Path("tests/fixtures/sample.svs") + def test_thumbnail_generation(sample_wsi): """Test thumbnail generation.""" service = Service() @@ -551,6 +532,7 @@ def test_thumbnail_generation(sample_wsi): assert thumbnail.size == (256, 256) assert thumbnail.mode in ["RGB", "RGBA"] + def test_metadata_extraction(sample_wsi): """Test metadata extraction.""" service = Service() @@ -595,6 +577,7 @@ def test_dicom_processing(): import cProfile import pstats + def profile_wsi_processing(): """Profile WSI processing performance.""" @@ -607,7 +590,7 @@ def profile_wsi_processing(): profiler.disable() stats = pstats.Stats(profiler) - stats.sort_stats('cumulative') + stats.sort_stats("cumulative") stats.print_stats(20) ``` diff --git a/src/aignostics/wsi/_openslide_handler.py b/src/aignostics/wsi/_openslide_handler.py index f118f6c96..132b220f9 100644 --- a/src/aignostics/wsi/_openslide_handler.py +++ b/src/aignostics/wsi/_openslide_handler.py @@ -1,7 +1,7 @@ """Handler for wsi files using OpenSlide.""" from pathlib import Path -from typing import Any +from typing import Any, Self import defusedxml.ElementTree as ET # noqa: N817 import openslide @@ -266,7 +266,7 @@ def close(self) -> None: """Close the OpenSlide object.""" self.slide.close() - def __enter__(self) -> "OpenSlideHandler": + def __enter__(self) -> Self: return self def __exit__( diff --git a/src/aignostics/wsi/_pydicom_handler.py b/src/aignostics/wsi/_pydicom_handler.py index dbdcbfe2f..2d9238255 100644 --- a/src/aignostics/wsi/_pydicom_handler.py +++ b/src/aignostics/wsi/_pydicom_handler.py @@ -3,7 +3,7 @@ import json from collections import defaultdict from pathlib import Path -from typing import Any +from typing import Any, Self import highdicom as hd import numpy as np @@ -448,7 +448,7 @@ def _organize_by_hierarchy(files: list[dict[str, Any]]) -> dict[str, Any]: return {"type": "root", "studies": studies} - def __enter__(self) -> "PydicomHandler": + def __enter__(self) -> Self: return self def __exit__(self, exc_type, exc_val, exc_tb) -> None: # noqa: ANN001 diff --git a/tests/CLAUDE.md b/tests/CLAUDE.md index ad8a368ae..28671793e 100644 --- a/tests/CLAUDE.md +++ b/tests/CLAUDE.md @@ -47,6 +47,7 @@ def mock_settings(): mock.return_value = settings yield mock + @pytest.fixture(autouse=True) def mock_can_open_browser(): """Prevent browser opening in CI/CD.""" @@ -78,17 +79,12 @@ def test_token_refresh_timing(): ```python def test_application_version_formats(): """Test all valid and invalid semver formats.""" - valid = [ - "1.0.0", - "1.0.0-alpha", - "1.0.0+meta", - "1.0.0-rc.1+meta" - ] + valid = ["1.0.0", "1.0.0-alpha", "1.0.0+meta", "1.0.0-rc.1+meta"] invalid = [ - "v1.0.0", # 'v' prefix not allowed - "1.0", # Incomplete - "", # Empty string + "v1.0.0", # 'v' prefix not allowed + "1.0", # Incomplete + "", # Empty string ] for v in valid: @@ -159,7 +155,7 @@ class TestBuildSdkMetadata: # Set GitHub Actions environment variables os.environ["GITHUB_RUN_ID"] = "12345" os.environ["GITHUB_REPOSITORY"] = "aignostics/python-sdk" - os.environ["GITHUB_SHA"] = "abc123def456" # pragma: allowlist secret + os.environ["GITHUB_SHA"] = "abc123def456" # pragma: allowlist secret os.environ["GITHUB_REF"] = "refs/heads/main" os.environ["GITHUB_WORKFLOW"] = "CI/CD" @@ -170,10 +166,8 @@ class TestBuildSdkMetadata: assert "github" in metadata["ci"] assert metadata["ci"]["github"]["run_id"] == "12345" assert metadata["ci"]["github"]["repository"] == "aignostics/python-sdk" - assert metadata["ci"]["github"]["sha"] == "abc123def456" # pragma: allowlist secret - assert metadata["ci"]["github"]["run_url"] == ( - "https://github.com/aignostics/python-sdk/actions/runs/12345" - ) + assert metadata["ci"]["github"]["sha"] == "abc123def456" # pragma: allowlist secret + assert metadata["ci"]["github"]["run_url"] == ("https://github.com/aignostics/python-sdk/actions/runs/12345") def test_build_metadata_with_pytest(clean_env: None) -> None: """Test metadata with pytest environment.""" @@ -185,9 +179,7 @@ class TestBuildSdkMetadata: # Pytest CI metadata should be present assert "ci" in metadata assert "pytest" in metadata["ci"] - assert metadata["ci"]["pytest"]["current_test"] == ( - "tests/platform/sdk_metadata_test.py::test_foo" - ) + assert metadata["ci"]["pytest"]["current_test"] == ("tests/platform/sdk_metadata_test.py::test_foo") assert metadata["ci"]["pytest"]["markers"] == ["unit", "sequential"] def test_interface_detection_cli(clean_env: None) -> None: @@ -236,7 +228,7 @@ class TestValidateSdkMetadata: "interface": "script", "source": "user", }, - "user_agent": "test/1.0.0" + "user_agent": "test/1.0.0", } with pytest.raises(ValidationError): @@ -253,7 +245,7 @@ class TestValidateSdkMetadata: "interface": "invalid_interface", # Invalid enum value "source": "user", }, - "user_agent": "test/1.0.0" + "user_agent": "test/1.0.0", } with pytest.raises(ValidationError): @@ -283,9 +275,8 @@ class TestGetSdkMetadataJsonSchema: assert "$id" in schema assert ( - schema["$id"] - == f"https://raw.githubusercontent.com/aignostics/python-sdk/main/" - f"docs/source/_static/sdk_metadata_schema_v{SDK_METADATA_SCHEMA_VERSION}.json" + schema["$id"] == f"https://raw.githubusercontent.com/aignostics/python-sdk/main/" + f"docs/source/_static/sdk_metadata_schema_v{SDK_METADATA_SCHEMA_VERSION}.json" ) assert "properties" in schema @@ -451,9 +442,7 @@ class TestNocacheDecoratorBehavior: class TestClientMeNocache: """Test nocache parameter for Client.me() method.""" - def test_me_default_uses_cache( - client_with_mock_api: Client, mock_api_client: MagicMock - ) -> None: + def test_me_default_uses_cache(client_with_mock_api: Client, mock_api_client: MagicMock) -> None: """Verify me() uses cache by default.""" mock_me_response = {"user_id": "test-user", "org_id": "test-org"} mock_api_client.get_me_v1_me_get.return_value = mock_me_response @@ -468,9 +457,7 @@ class TestClientMeNocache: assert result2 == mock_me_response assert mock_api_client.get_me_v1_me_get.call_count == 1 # No additional call - def test_me_nocache_true_fetches_fresh_data( - client_with_mock_api: Client, mock_api_client: MagicMock - ) -> None: + def test_me_nocache_true_fetches_fresh_data(client_with_mock_api: Client, mock_api_client: MagicMock) -> None: """Verify me(nocache=True) fetches fresh data.""" mock_me_response_1 = {"user_id": "user-1"} mock_me_response_2 = {"user_id": "user-2"} @@ -488,9 +475,7 @@ class TestClientMeNocache: assert result2 == mock_me_response_2 assert mock_api_client.get_me_v1_me_get.call_count == 2 # Additional call made - def test_me_nocache_true_updates_cache( - client_with_mock_api: Client, mock_api_client: MagicMock - ) -> None: + def test_me_nocache_true_updates_cache(client_with_mock_api: Client, mock_api_client: MagicMock) -> None: """Verify me(nocache=True) updates cache with fresh data.""" mock_me_response_1 = {"user_id": "user-1"} mock_me_response_2 = {"user_id": "user-2"} @@ -520,6 +505,7 @@ class TestNocacheEdgeCases: def test_nocache_with_expired_cache_entry() -> None: """Test nocache behavior when cache entry expired.""" + @cached_operation(ttl=1, use_token=False) # 1 second TTL def test_func() -> int: return time.time_ns() @@ -625,7 +611,7 @@ def test_cleanup_processes_terminates_running(): def test_pagination_generator(): """Verify pagination doesn't materialize full result set.""" page1 = [Mock(id=f"run-{i}") for i in range(50)] - page2 = [Mock(id=f"run-{i+50}") for i in range(5)] + page2 = [Mock(id=f"run-{i + 50}") for i in range(5)] mock_api.list_runs.side_effect = [page1, page2] result_gen = runs.list() # Generator, not list @@ -657,8 +643,8 @@ def qupath_teardown(): """Ensure QuPath processes cleaned up.""" yield # Kill any remaining QuPath processes - for proc in psutil.process_iter(['name']): - if 'QuPath' in proc.info['name']: + for proc in psutil.process_iter(["name"]): + if "QuPath" in proc.info["name"]: proc.terminate() proc.wait(timeout=5) ``` @@ -720,6 +706,7 @@ def mock_api(): api.list_applications.return_value = [...] return api + @pytest.fixture def mock_client(mock_api): """Mock platform Client.""" @@ -744,12 +731,7 @@ def mock_wsi_file(tmp_path): ```python @responses.activate def test_api_call(): - responses.add( - responses.GET, - "https://api.aignostics.com/v1/runs", - json={"runs": []}, - status=200 - ) + responses.add(responses.GET, "https://api.aignostics.com/v1/runs", json={"runs": []}, status=200) ``` ## Test Coverage Requirements @@ -800,6 +782,7 @@ def test_concurrent_runs(): def test_memory_usage(): """Verify no memory leaks in long operations.""" import tracemalloc + tracemalloc.start() # Run operations @@ -822,9 +805,7 @@ class TestPlatformIntegration: """Start mock platform API.""" docker_services.start("platform-mock") docker_services.wait_until_responsive( - check=lambda: requests.get("http://localhost:8080/health"), - timeout=30.0, - pause=0.5 + check=lambda: requests.get("http://localhost:8080/health"), timeout=30.0, pause=0.5 ) def test_full_workflow(self, platform_container): @@ -907,11 +888,14 @@ def create_test_wsi(size_mb: int = 10) -> Path: ### Parameterized Testing ```python -@pytest.mark.parametrize("version,expected", [ - ("v1.0.0", True), - ("1.0.0", False), - ("v1.0", False), -]) +@pytest.mark.parametrize( + "version,expected", + [ + ("v1.0.0", True), + ("1.0.0", False), + ("v1.0", False), + ], +) def test_version_validation(version, expected): assert is_valid_semver(version) == expected ``` @@ -1029,7 +1013,9 @@ pytest -k "token" # Enable breakpoint in test def test_complex_logic(): result = complex_function() - import pdb; pdb.set_trace() # Breakpoint + import pdb + + pdb.set_trace() # Breakpoint assert result.status == "success" ``` diff --git a/tests/aignostics/application/cli_pipeline_validation_test.py b/tests/aignostics/application/cli_pipeline_validation_test.py index 9e49b85d9..86b891b1c 100644 --- a/tests/aignostics/application/cli_pipeline_validation_test.py +++ b/tests/aignostics/application/cli_pipeline_validation_test.py @@ -31,7 +31,7 @@ def test_cli_run_submit_fails_on_invalid_gpu_type(runner: CliRunner, tmp_path: P HETA_APPLICATION_ID, str(csv_path), "--deadline", - (datetime.now(tz=UTC) + timedelta(seconds=5)).isoformat(), + (datetime.now(tz=UTC) + timedelta(minutes=5)).isoformat(), "--gpu-type", "INVALID_GPU", "--tags", @@ -65,7 +65,7 @@ def test_cli_run_submit_fails_on_invalid_gpu_provisioning_mode(runner: CliRunner HETA_APPLICATION_ID, str(csv_path), "--deadline", - (datetime.now(tz=UTC) + timedelta(seconds=5)).isoformat(), + (datetime.now(tz=UTC) + timedelta(minutes=5)).isoformat(), "--gpu-provisioning-mode", "INVALID_MODE", "--tags", @@ -99,7 +99,7 @@ def test_cli_run_submit_fails_on_invalid_cpu_provisioning_mode(runner: CliRunner HETA_APPLICATION_ID, str(csv_path), "--deadline", - (datetime.now(tz=UTC) + timedelta(seconds=5)).isoformat(), + (datetime.now(tz=UTC) + timedelta(minutes=5)).isoformat(), "--cpu-provisioning-mode", "RESERVED", "--tags", @@ -133,7 +133,7 @@ def test_cli_run_submit_fails_on_max_gpus_per_slide_zero(runner: CliRunner, tmp_ HETA_APPLICATION_ID, str(csv_path), "--deadline", - (datetime.now(tz=UTC) + timedelta(seconds=5)).isoformat(), + (datetime.now(tz=UTC) + timedelta(minutes=5)).isoformat(), "--max-gpus-per-slide", "0", "--tags", @@ -167,7 +167,7 @@ def test_cli_run_submit_fails_on_max_gpus_per_slide_too_high(runner: CliRunner, HETA_APPLICATION_ID, str(csv_path), "--deadline", - (datetime.now(tz=UTC) + timedelta(seconds=5)).isoformat(), + (datetime.now(tz=UTC) + timedelta(minutes=5)).isoformat(), "--max-gpus-per-slide", "9", "--tags", diff --git a/tests/aignostics/application/cli_test.py b/tests/aignostics/application/cli_test.py index ab68010e3..3bb6448f3 100644 --- a/tests/aignostics/application/cli_test.py +++ b/tests/aignostics/application/cli_test.py @@ -1148,6 +1148,7 @@ def test_cli_run_execute_invalid_mapping_format(runner: CliRunner, tmp_path: Pat str(tmp_path), "--mapping", ".*\\.tiff:staining_method:H&E", # Wrong: colon instead of equals + "--force", # Skip health check; we're testing argument validation only ], ) assert result.exit_code != 0 @@ -1169,6 +1170,7 @@ def test_cli_run_execute_invalid_regex_pattern(runner: CliRunner, tmp_path: Path str(tmp_path), "--mapping", "*.tiff:staining_method=H&E", # Wrong: glob pattern, not regex + "--force", # Skip health check; we're testing argument validation only ], ) assert result.exit_code != 0 diff --git a/tests/aignostics/application/download_test.py b/tests/aignostics/application/download_test.py index e5537ef62..f8fee1adb 100644 --- a/tests/aignostics/application/download_test.py +++ b/tests/aignostics/application/download_test.py @@ -1,13 +1,19 @@ """Tests for download utility functions in the application module.""" from pathlib import Path -from unittest.mock import Mock, patch +from unittest.mock import MagicMock, Mock, patch import pytest import requests -from aignostics.application._download import download_url_to_file_with_progress, extract_filename_from_url +from aignostics.application._download import ( + download_available_items, + download_item_artifact, + download_url_to_file_with_progress, + extract_filename_from_url, +) from aignostics.application._models import DownloadProgress, DownloadProgressState +from aignostics.platform import ArtifactOutput @pytest.mark.unit @@ -397,3 +403,189 @@ def progress_callback(p: DownloadProgress) -> None: # Verify direct URL was used (no signed URL generation) mock_get.assert_called_once_with(https_url, stream=True, timeout=60) + + +# --------------------------------------------------------------------------- +# download_item_artifact / download_available_items: AVAILABLE-gating + URL flow +# --------------------------------------------------------------------------- + + +_PRESIGNED_URL = "https://storage.googleapis.com/bucket/file?sig=abc" +# Patch _download.get_file_extension_for_artifact (NOT _utils.*) — the function +# is imported by name into _download, so re-binding it on _utils does nothing. +# Copilot called this out on PR #478 (comments #3 + #4). +_PATCH_GET_EXT = "aignostics.application._download.get_file_extension_for_artifact" +_PATCH_DOWNLOAD_FILE_WITH_PROGRESS = "aignostics.application._download.download_file_with_progress" + + +def _mock_artifact( + *, + output_artifact_id: str = "art-1", + name: str = "result", + output: ArtifactOutput = ArtifactOutput.AVAILABLE, + metadata: dict | None = None, +) -> MagicMock: + """Build a mock OutputArtifactResultReadResponse for tests.""" + artifact = MagicMock() + artifact.output_artifact_id = output_artifact_id + artifact.name = name + artifact.output = output + artifact.metadata = metadata if metadata is not None else {"checksum_base64_crc32c": "AAAA"} + return artifact + + +@pytest.mark.unit +def test_download_item_artifact_resolves_fresh_url_per_call(tmp_path: Path) -> None: + """download_item_artifact must call run.get_artifact_download_url(artifact_id). + + The deprecated artifact.download_url field is no longer consulted; every + download resolves a fresh, short-lived URL via the /file endpoint. This + test pins that behavior. + """ + artifact = _mock_artifact() + run = MagicMock() + run.get_artifact_download_url.return_value = _PRESIGNED_URL + + with ( + patch(_PATCH_GET_EXT, return_value=".csv"), + patch(_PATCH_DOWNLOAD_FILE_WITH_PROGRESS) as mock_download, + ): + download_item_artifact( + progress=DownloadProgress(), + run=run, + artifact=artifact, + destination_directory=tmp_path, + ) + + run.get_artifact_download_url.assert_called_once_with("art-1") + # download_file_with_progress was handed the fresh URL, not anything from artifact + mock_download.assert_called_once() + assert mock_download.call_args.args[1] == _PRESIGNED_URL + + +@pytest.mark.unit +def test_download_item_artifact_skips_when_local_checksum_matches(tmp_path: Path) -> None: + """If the artifact already exists locally with the right checksum, skip. + + Critical: do NOT call run.get_artifact_download_url in this branch — the + presigned URL request hits SAMIA, and skipping it shortens resume cycles + and reduces backend load. + """ + import base64 + + import crc32c as crc32c_lib # local import keeps the test name space tight + + content = b"hello, slide" + artifact_path = tmp_path / "result.csv" + artifact_path.write_bytes(content) + + h = crc32c_lib.CRC32CHash() + h.update(content) + correct_checksum = base64.b64encode(h.digest()).decode("ascii") + + artifact = _mock_artifact(metadata={"checksum_base64_crc32c": correct_checksum}) + run = MagicMock() + + with ( + patch(_PATCH_GET_EXT, return_value=".csv"), + patch(_PATCH_DOWNLOAD_FILE_WITH_PROGRESS) as mock_download, + ): + download_item_artifact( + progress=DownloadProgress(), + run=run, + artifact=artifact, + destination_directory=tmp_path, + ) + + run.get_artifact_download_url.assert_not_called() + mock_download.assert_not_called() + + +@pytest.mark.unit +def test_download_item_artifact_raises_when_no_checksum(tmp_path: Path) -> None: + """Empty metadata -> ValueError, before any URL is requested.""" + artifact = _mock_artifact(metadata={}) + run = MagicMock() + + with pytest.raises(ValueError, match="No checksum metadata"): + download_item_artifact( + progress=DownloadProgress(), + run=run, + artifact=artifact, + destination_directory=tmp_path, + ) + + run.get_artifact_download_url.assert_not_called() + + +@pytest.mark.unit +def test_download_available_items_skips_non_available_artifacts(tmp_path: Path) -> None: + """Artifacts with output != AVAILABLE are skipped. + + Per Dima on PR #478: the /file endpoint does NOT return a presigned URL for + artifacts that aren't AVAILABLE. Calling it for a NONE artifact would fail + the whole download. This test pins the guard. + """ + from aignostics.platform import ItemOutput, ItemState + + available = _mock_artifact(output_artifact_id="art-ok", output=ArtifactOutput.AVAILABLE) + none_artifact = _mock_artifact(output_artifact_id="art-skip", output=ArtifactOutput.NONE) + + item = MagicMock() + item.external_id = "slide-1" + item.state = ItemState.TERMINATED + item.output = ItemOutput.FULL + item.output_artifacts = [available, none_artifact] + + run = MagicMock() + run.run_id = "run-xyz" + run.results.return_value = [item] + + with patch("aignostics.application._download.download_item_artifact") as mock_dia: + download_available_items( + progress=DownloadProgress(), + application_run=run, + destination_directory=tmp_path, + downloaded_items=set(), + ) + + # Only the AVAILABLE artifact triggered a download + assert mock_dia.call_count == 1 + forwarded_artifact = mock_dia.call_args.args[2] + assert forwarded_artifact.output_artifact_id == "art-ok" + + +@pytest.mark.unit +def test_download_available_items_passes_run_to_download_item_artifact(tmp_path: Path) -> None: + """download_item_artifact is called with the Run instance as the second positional arg. + + download_item_artifact needs the Run handle to call get_artifact_download_url, + so the calling site must pass it through. Pinning the call shape keeps the + contract explicit. + """ + from aignostics.platform import ItemOutput, ItemState + + artifact = _mock_artifact() + item = MagicMock() + item.external_id = "slide-1" + item.state = ItemState.TERMINATED + item.output = ItemOutput.FULL + item.output_artifacts = [artifact] + + run = MagicMock() + run.run_id = "run-xyz" + run.results.return_value = [item] + + with patch("aignostics.application._download.download_item_artifact") as mock_dia: + download_available_items( + progress=DownloadProgress(), + application_run=run, + destination_directory=tmp_path, + downloaded_items=set(), + ) + + # Args order matches def download_item_artifact(progress, run, artifact, ...) + forwarded_progress, forwarded_run, forwarded_artifact, *_ = mock_dia.call_args.args + assert forwarded_run is run + assert forwarded_artifact is artifact + assert isinstance(forwarded_progress, DownloadProgress) diff --git a/tests/aignostics/application/gui_test.py b/tests/aignostics/application/gui_test.py index d944a7eed..59ba189e2 100644 --- a/tests/aignostics/application/gui_test.py +++ b/tests/aignostics/application/gui_test.py @@ -7,7 +7,7 @@ from datetime import UTC, datetime, timedelta from pathlib import Path from typing import TYPE_CHECKING -from unittest.mock import patch +from unittest.mock import AsyncMock, MagicMock, Mock, patch import pytest from nicegui.testing import User @@ -15,7 +15,11 @@ from aignostics import WSI_SUPPORTED_FILE_EXTENSIONS from aignostics.application import Service -from aignostics.application._gui._page_application_run_describe import RESULTS_PAGE_SIZE +from aignostics.application._gui._page_application_run_describe import ( + RESULTS_PAGE_SIZE, + _resolve_artifact_url_and_invoke, + _resolve_artifact_url_or_notify, +) from aignostics.cli import cli from tests.conftest import assert_notified, normalize_output, print_directory_structure from tests.constants_test import ( @@ -570,3 +574,150 @@ async def test_gui_run_results_pagination_show_more(user: User, silent_logging: else: # All items loaded - button should be hidden await user.should_not_see(marker="BUTTON_SHOW_MORE_RESULTS", retries=20) + + +# --------------------------------------------------------------------------- +# _resolve_artifact_url_or_notify — module-level GUI helper +# --------------------------------------------------------------------------- + +_PATCH_NICEGUI_RUN_IO_BOUND = "aignostics.application._gui._page_application_run_describe.nicegui_run.io_bound" +_PATCH_UI_NOTIFY = "aignostics.application._gui._page_application_run_describe.ui.notify" + + +@pytest.mark.unit +@pytest.mark.asyncio +async def test_resolve_artifact_url_or_notify_returns_url_on_success() -> None: + """Happy path: io_bound succeeds → helper returns the URL, no notify is shown.""" + fake_run = MagicMock() + fake_button = MagicMock() + presigned_url = "https://storage.example.com/file?sig=abc" + + with ( + patch(_PATCH_NICEGUI_RUN_IO_BOUND, new_callable=AsyncMock, return_value=presigned_url) as mock_io_bound, + patch(_PATCH_UI_NOTIFY) as mock_notify, + ): + result = await _resolve_artifact_url_or_notify(fake_run, "art-123", fake_button) + + assert result == presigned_url + mock_io_bound.assert_awaited_once_with(fake_run.get_artifact_download_url, "art-123") + mock_notify.assert_not_called() + + +@pytest.mark.unit +@pytest.mark.asyncio +async def test_resolve_artifact_url_or_notify_returns_none_and_notifies_on_exception() -> None: + """Failure path: io_bound raises → helper notifies user with warning, returns None. + + This is the principled-error-handling path for the GUI. Without it, the + NiceGUI click handler would surface the exception as a dev-console traceback, + not as a user-friendly notification — and the loading state would stay + forever stuck on the button. + """ + fake_run = MagicMock() + fake_button = MagicMock() + + with ( + patch( + _PATCH_NICEGUI_RUN_IO_BOUND, + new_callable=AsyncMock, + side_effect=RuntimeError("SAMIA returned 503"), + ) as mock_io_bound, + patch(_PATCH_UI_NOTIFY) as mock_notify, + ): + result = await _resolve_artifact_url_or_notify(fake_run, "art-123", fake_button) + + assert result is None + mock_io_bound.assert_awaited_once() + mock_notify.assert_called_once() + # The notify call carries the failure detail and is a user-friendly warning. + notify_args, notify_kwargs = mock_notify.call_args + assert "SAMIA returned 503" in notify_args[0] + assert notify_kwargs["type"] == "warning" + + +@pytest.mark.unit +@pytest.mark.asyncio +async def test_resolve_artifact_url_or_notify_toggles_button_loading_state_on_success() -> None: + """The loading prop must be both added before and removed after a successful resolve. + + A button left in loading state after a successful URL fetch is a classic UI + bug — the user sees a spinner forever. The ``finally`` block in the helper + must run on both the success and the exception paths; this test pins the + success path; the next test pins the exception path. + """ + fake_run = MagicMock() + fake_button = MagicMock() + + with ( + patch(_PATCH_NICEGUI_RUN_IO_BOUND, new_callable=AsyncMock, return_value="https://x"), + patch(_PATCH_UI_NOTIFY), + ): + await _resolve_artifact_url_or_notify(fake_run, "art-1", fake_button) + + fake_button.props.assert_any_call(add="loading") + fake_button.props.assert_any_call(remove="loading") + + +@pytest.mark.unit +@pytest.mark.asyncio +async def test_resolve_artifact_url_or_notify_toggles_button_loading_state_on_exception() -> None: + """The loading prop must be removed even when the URL resolve raises.""" + fake_run = MagicMock() + fake_button = MagicMock() + + with ( + patch(_PATCH_NICEGUI_RUN_IO_BOUND, new_callable=AsyncMock, side_effect=RuntimeError("boom")), + patch(_PATCH_UI_NOTIFY), + ): + await _resolve_artifact_url_or_notify(fake_run, "art-1", fake_button) + + fake_button.props.assert_any_call(add="loading") + fake_button.props.assert_any_call(remove="loading") + + +# --------------------------------------------------------------------------- +# _resolve_artifact_url_and_invoke — composition helper used by every per-artifact button +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +@pytest.mark.asyncio +async def test_resolve_artifact_url_and_invoke_calls_on_success_with_url() -> None: + """When URL resolution succeeds, on_success is invoked exactly once with the URL. + + This is the composition path used by every per-artifact button in the run + page (TIFF preview, CSV preview, browser download). Pinning the call shape + means a future refactor cannot accidentally pass the wrong argument or + skip the success branch. + """ + fake_run = MagicMock() + fake_button = MagicMock() + on_success = Mock() + presigned_url = "https://storage.example.com/file?sig=xyz" + + with patch(_PATCH_NICEGUI_RUN_IO_BOUND, new_callable=AsyncMock, return_value=presigned_url): + await _resolve_artifact_url_and_invoke(fake_run, "art-1", fake_button, on_success) + + on_success.assert_called_once_with(presigned_url) + + +@pytest.mark.unit +@pytest.mark.asyncio +async def test_resolve_artifact_url_and_invoke_short_circuits_on_resolution_failure() -> None: + """When URL resolution fails, on_success must NOT be called. + + The user has already been notified via ui.notify by the inner helper; + invoking on_success with None would either crash (e.g. webbrowser.open(None)) + or open a dialog with no content. Pinning the short-circuit. + """ + fake_run = MagicMock() + fake_button = MagicMock() + on_success = Mock() + + with ( + patch(_PATCH_NICEGUI_RUN_IO_BOUND, new_callable=AsyncMock, side_effect=RuntimeError("nope")), + patch(_PATCH_UI_NOTIFY), # notify is called but we don't assert on it here + ): + await _resolve_artifact_url_and_invoke(fake_run, "art-1", fake_button, on_success) + + on_success.assert_not_called() diff --git a/tests/aignostics/application/utils_test.py b/tests/aignostics/application/utils_test.py index 5466cf9e8..049b73d31 100644 --- a/tests/aignostics/application/utils_test.py +++ b/tests/aignostics/application/utils_test.py @@ -974,9 +974,12 @@ def test_retrieve_and_print_run_details_default_is_detailed(mock_console: Mock) # Verify detailed output shows "Run Details" not "Run Summary" assert "Run Details for run-detailed-test" in all_output - # Verify artifact details ARE shown in detailed mode - assert "Download URL" in all_output + # Verify artifact details ARE shown in detailed mode. + # Download URL is no longer printed: presigned URLs are short-lived and + # are now resolved on-demand via Run.get_artifact_download_url(). + assert "Download URL" not in all_output assert "Artifact ID" in all_output + assert "MIME Type" in all_output @pytest.mark.unit diff --git a/tests/aignostics/dataset/gui_test.py b/tests/aignostics/dataset/gui_test.py index 99e079482..b48bee231 100644 --- a/tests/aignostics/dataset/gui_test.py +++ b/tests/aignostics/dataset/gui_test.py @@ -123,8 +123,10 @@ async def test_gui_idc_download_fails_with_no_inputs( # noqa: PLR0913, PLR0917 [ ( "4711", - "Download failed: None of the values passed matched any of the identifiers: " - "collection_id, PatientID, StudyInstanceUID, SeriesInstanceUID, SOPInstanceUID.", + ( + "Download failed: None of the values passed matched any of the identifiers: " + "collection_id, PatientID, StudyInstanceUID, SeriesInstanceUID, SOPInstanceUID." + ), ), ], ) diff --git a/tests/aignostics/notebook/service_test.py b/tests/aignostics/notebook/service_test.py index a353a3441..25ea7b032 100644 --- a/tests/aignostics/notebook/service_test.py +++ b/tests/aignostics/notebook/service_test.py @@ -6,8 +6,6 @@ from unittest.mock import MagicMock, patch import pytest -from fastapi.testclient import TestClient -from nicegui import app from nicegui.testing import User from aignostics.notebook._service import MARIMO_SERVER_STARTUP_TIMEOUT, Service, _get_runner, _Runner @@ -99,7 +97,7 @@ def test_notebook_start_and_stop(caplog: pytest.LogCaptureFixture) -> None: @pytest.mark.flaky(retries=1, delay=5, only_on=[AssertionError]) @pytest.mark.sequential @pytest.mark.timeout(timeout=60 * 2) -def test_serve_notebook(user: User, caplog: pytest.LogCaptureFixture) -> None: +async def test_serve_notebook(user: User, caplog: pytest.LogCaptureFixture) -> None: """Test notebook serving. Args: @@ -112,10 +110,8 @@ def test_serve_notebook(user: User, caplog: pytest.LogCaptureFixture) -> None: # Set up logging to capture DEBUG level and above caplog.set_level(logging.DEBUG) - client = TestClient(app) - try: - response = client.get("/notebook/4711?results_folder=/tmp", timeout=60) + response = await user.http_client.get("/notebook/4711?results_folder=/tmp", timeout=60) assert response.status_code == 200 content = response.content.decode("utf-8") assert "iframe" in content diff --git a/tests/aignostics/platform/e2e_test.py b/tests/aignostics/platform/e2e_test.py index ac2cf8924..634e85da0 100644 --- a/tests/aignostics/platform/e2e_test.py +++ b/tests/aignostics/platform/e2e_test.py @@ -725,6 +725,128 @@ def test_platform_heta_app_find_and_validate() -> None: ) +_CANARY_MAX_RUNS_TO_INSPECT = 20 + + +def _find_available_output_artifact_id(run: Run) -> str | None: + """Return the first AVAILABLE output_artifact_id from a successful run, else None. + + Single-run scan helper for the /file endpoint canary. Walks the run's + items and per-item artifacts and returns the first artifact whose output + is ``AVAILABLE`` and whose ``output_artifact_id`` is non-empty. Returns + ``None`` when the run produced no usable artifact (e.g. all items + terminated with ``NONE`` output). The caller is expected to have already + filtered for runs that are ``TERMINATED`` with ``FULL`` output. + """ + for item in run.results(nocache=True): + if item.state is not ItemState.TERMINATED or item.output is not ItemOutput.FULL: + continue + for art in item.output_artifacts: + if art.output is ArtifactOutput.AVAILABLE and art.output_artifact_id: + return art.output_artifact_id + return None + + +def _find_available_artifact_in_recent_heta_run() -> tuple[Run, str] | None: + """Find one AVAILABLE output artifact from a recent successful HETA run. + + Helper for the /file endpoint canary below. Iterates the most recent HETA + runs tagged ``scheduled`` lazily — only inspects up to + ``_CANARY_MAX_RUNS_TO_INSPECT`` runs and stops at the first hit. Returns + ``None`` when no such artifact is reachable in the inspected window — the + canary skips in that case rather than fails. + + The cap exists because, as staging accumulates scheduled runs, materializing + the full list and querying details/results for every one risks the canary's + 60s timeout. Runs from ``client.runs.list`` are already returned newest-first + so capping is biased toward recent data. + + Returns: + tuple[Run, str] | None: A bound (Run, output_artifact_id) pair, or None. + """ + import itertools + + client = platform.Client() + # client.runs.list yields Run handles directly; iterate lazily and cap to N + # so the canary stays well under its 60s timeout even on a busy staging env. + candidate_runs = itertools.islice( + client.runs.list( + application_id=HETA_APPLICATION_ID, + application_version=HETA_APPLICATION_VERSION, + custom_metadata='$.sdk.tags[*] ? (@ == "scheduled")', + ), + _CANARY_MAX_RUNS_TO_INSPECT, + ) + for run in candidate_runs: + details = run.details(nocache=True) + if details.state is not RunState.TERMINATED or details.output is not RunOutput.FULL: + continue + artifact_id = _find_available_output_artifact_id(run) + if artifact_id is not None: + return run, artifact_id + return None + + +@pytest.mark.e2e +@pytest.mark.timeout(timeout=60) +def test_platform_artifact_file_endpoint_resolves_to_working_url(record_property) -> None: + """Smoke-test the SAMIA /file endpoint at PR time, not 6h later. + + The full ``_validate_output`` flow that exercises the new endpoint is + ``scheduled_only``, so it never runs on PRs. That leaves a window in which + a SAMIA-side regression to ``/api/v1/runs/{run_id}/artifacts/{artifact_id}/file`` + (think PAPI-4868: the auth bug that forced reverting #507) ships green + through PR CI and is only caught by the next scheduled run, up to 6h later. + + This canary closes that window: at PR time it picks any recent successful + HETA run, resolves a presigned URL through ``Run.get_artifact_download_url`` + against real SAMIA, and ``HEAD``-checks the URL to confirm the storage backend + serves it. The whole roundtrip is well under 60s, so the test stays out of + ``long_running`` and runs on every PR that doesn't carry ``skip:test:e2e``. + + Failure modes covered end-to-end: + + - ``/file`` endpoint 5xx / 4xx (auth, missing run, etc.) → typed exception + - ``/file`` returns 3xx with an empty Location header → ``RuntimeError`` + - ``/file`` returns a Location URL that storage rejects → ``HEAD`` fails + - ``/file`` returns a Location URL pointing at the wrong scope → ``HEAD`` 403 + + Skips when no recent successful HETA run exists (e.g. the first time the + scheduled suite has been run in a fresh staging environment) — the test + is a canary, not a fixture provider, so a missing prerequisite is not a + failure. + + Raises: + AssertionError: If the resolved URL is empty or the HEAD request fails. + """ + record_property("tested-item-id", "SPEC-PLATFORM-SERVICE") + + # Local import: requests is already an SDK dep; importing at use-site avoids + # adding to the module-level imports of an otherwise import-heavy test file. + import requests + + found = _find_available_artifact_in_recent_heta_run() + if found is None: + pytest.skip( + f"No AVAILABLE output artifact in any recent HETA run " + f"(version {HETA_APPLICATION_VERSION}) tagged 'scheduled'; " + "this canary needs one from the scheduled suite to exercise the /file endpoint." + ) + selected_run, artifact_id = found + + presigned_url = selected_run.get_artifact_download_url(artifact_id) + assert presigned_url, ( + f"SAMIA /file endpoint returned an empty/falsy URL for run={selected_run.run_id} artifact={artifact_id}" + ) + + head_response = requests.head(presigned_url, allow_redirects=True, timeout=30) + assert head_response.ok, ( + f"Presigned URL from /file endpoint did not serve content: " + f"{head_response.status_code} {head_response.reason} " + f"(run={selected_run.run_id} artifact={artifact_id})" + ) + + def _validate_output( application_run: Run, output_base_folder: Path, @@ -786,8 +908,10 @@ def _validate_output( f"Application run `{application_run.run_id}`: " f"artifact `{artifact}` should have output state `AVAILABLE`." ) - assert artifact.download_url is not None, ( - f"Application run `{application_run.run_id}`: artifact `{artifact}` should provide a download url." + artifact_download_url = application_run.get_artifact_download_url(artifact.output_artifact_id) + assert artifact_download_url, ( + f"Application run `{application_run.run_id}`: artifact `{artifact}` " + f"should resolve to a presigned download URL via the /file endpoint." ) file_ending = platform.mime_type_to_file_ending(platform.get_mime_type_for_artifact(artifact)) file_path = item_dir / f"{artifact.name}{file_ending}" diff --git a/tests/aignostics/platform/resources/resource_utils_test.py b/tests/aignostics/platform/resources/resource_utils_test.py index b1d1d2657..ef602409f 100644 --- a/tests/aignostics/platform/resources/resource_utils_test.py +++ b/tests/aignostics/platform/resources/resource_utils_test.py @@ -94,6 +94,15 @@ def test_paginate_custom_page_size() -> None: mock_func.assert_called_once_with(page=1, page_size=custom_page_size) +@pytest.mark.unit +@pytest.mark.parametrize("page_size", [0, -1, -100]) +def test_paginate_raises_for_non_positive_page_size(page_size: int) -> None: + """Test that paginate raises ValueError when page_size is zero or negative.""" + mock_func = Mock() + with pytest.raises(ValueError, match="page_size must be a positive integer"): + list(paginate(mock_func, page_size=page_size)) + + @pytest.mark.unit def test_paginate_multiple_pages() -> None: """Test that paginate correctly iterates through multiple pages. diff --git a/tests/aignostics/platform/resources/runs_test.py b/tests/aignostics/platform/resources/runs_test.py index cb4749eda..1de38ebbf 100644 --- a/tests/aignostics/platform/resources/runs_test.py +++ b/tests/aignostics/platform/resources/runs_test.py @@ -4,10 +4,13 @@ verifying their functionality for listing, creating, and managing application runs. """ -from unittest.mock import Mock +from http import HTTPStatus +from unittest.mock import MagicMock, Mock, patch import pytest +import requests from aignx.codegen.api.public_api import PublicApi +from aignx.codegen.exceptions import ApiException, NotFoundException, ServiceException from aignx.codegen.models import ( InputArtifactCreationRequest, ItemCreationRequest, @@ -16,9 +19,17 @@ RunReadResponse, ) -from aignostics.platform.resources.runs import LIST_APPLICATION_RUNS_MAX_PAGE_SIZE, Run, Runs +from aignostics.platform.resources.runs import LIST_APPLICATION_RUNS_MAX_PAGE_SIZE, Artifact, Run, Runs from aignostics.platform.resources.utils import PAGE_SIZE +_PLATFORM_HOST = "https://platform-staging.aignostics.com" +_RUN_ID = "test-run-id" +_ARTIFACT_ID = "artifact-123" +_PRESIGNED_URL = "https://storage.googleapis.com/bucket/file?sig=abc123" +_PATCH_REQUESTS_GET = "aignostics.platform.resources.runs.requests.get" +_PATCH_GET_TOKEN = "aignostics.platform.resources.runs.get_token" # noqa: S105 +_PATCH_SETTINGS = "aignostics.platform.resources.runs.settings" + @pytest.fixture def mock_api() -> Mock: @@ -53,7 +64,55 @@ def app_run(mock_api) -> Run: Returns: Run: An Run instance using the mock API. """ - return Run(mock_api, "test-run-id") + return Run(mock_api, _RUN_ID) + + +@pytest.fixture +def configured_api(mock_api) -> Mock: + """Wire a Mock API client to expose a `configuration` matching real codegen shape. + + Returns: + Mock: The same `mock_api` fixture, with `api_client.configuration` + populated with `host`, `proxy`, `ssl_ca_cert`, `verify_ssl`, + and ``token_provider=None`` (so Artifact falls back to ``get_token``). + Tests that need to verify ``token_provider`` propagation should set + it explicitly. + """ + mock_api.api_client = Mock() + mock_api.api_client.configuration.host = _PLATFORM_HOST + mock_api.api_client.configuration.proxy = None + mock_api.api_client.configuration.ssl_ca_cert = None + mock_api.api_client.configuration.verify_ssl = True + mock_api.api_client.configuration.token_provider = None + return mock_api + + +@pytest.fixture +def artifact(configured_api) -> Artifact: + """Create an Artifact instance bound to a configured mock API.""" + return Artifact(configured_api, _RUN_ID, _ARTIFACT_ID) + + +def _redirect_response(location: str | None, status: int = HTTPStatus.TEMPORARY_REDIRECT) -> MagicMock: + """Build a context-manager-shaped Mock response with the given status + Location.""" + response = MagicMock() + response.__enter__ = Mock(return_value=response) + response.__exit__ = Mock(return_value=False) + response.status_code = status + response.headers = {"Location": location} if location is not None else {} + response.reason = HTTPStatus(status).phrase or "Unknown" + return response + + +def _error_response(status: int) -> MagicMock: + """Build a context-manager-shaped Mock response with the given non-redirect status.""" + response = MagicMock() + response.__enter__ = Mock(return_value=response) + response.__exit__ = Mock(return_value=False) + response.status_code = status + response.headers = {} + response.reason = HTTPStatus(status).phrase + return response @pytest.mark.unit @@ -725,3 +784,462 @@ def test_run_details_does_not_retry_other_exceptions(app_run, mock_api) -> None: app_run.details() assert mock_api.get_run_v1_runs_run_id_get.call_count == 1 + + +# --------------------------------------------------------------------------- +# Artifact / Run.get_artifact_download_url +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +@pytest.mark.parametrize( + "redirect_status", + [ + HTTPStatus.MOVED_PERMANENTLY, + HTTPStatus.FOUND, + HTTPStatus.SEE_OTHER, + HTTPStatus.TEMPORARY_REDIRECT, + HTTPStatus.PERMANENT_REDIRECT, + ], +) +def test_artifact_get_download_url_returns_location_for_any_redirect(artifact, redirect_status) -> None: + """Any 3xx redirect status with a Location header yields the presigned URL. + + The /file endpoint contractually returns 307, but the SDK accepts every + well-known redirect status so the SDK keeps working if the API ever flips + one for cache reasons. 303 See Other is included per Copilot's PR review — + the back-end could legitimately switch to it for a POST→GET redirect shape. + """ + response = _redirect_response(_PRESIGNED_URL, status=redirect_status) + + with ( + patch(_PATCH_GET_TOKEN, return_value="test-token"), + patch(_PATCH_REQUESTS_GET, return_value=response) as mock_get, + ): + url = artifact.get_download_url() + + assert url == _PRESIGNED_URL + mock_get.assert_called_once() + assert mock_get.call_args.args[0] == f"{_PLATFORM_HOST}/api/v1/runs/{_RUN_ID}/artifacts/{_ARTIFACT_ID}/file" + assert mock_get.call_args.kwargs["allow_redirects"] is False + assert mock_get.call_args.kwargs["stream"] is True + assert mock_get.call_args.kwargs["headers"]["Authorization"] == "Bearer test-token" + assert "User-Agent" in mock_get.call_args.kwargs["headers"] + + +@pytest.mark.unit +def test_artifact_get_download_url_strips_trailing_slash_from_host(configured_api) -> None: + """Trailing slash on configuration.host must not produce a `//api/v1/...` URL.""" + configured_api.api_client.configuration.host = f"{_PLATFORM_HOST}/" + art = Artifact(configured_api, _RUN_ID, _ARTIFACT_ID) + response = _redirect_response(_PRESIGNED_URL) + + with ( + patch(_PATCH_GET_TOKEN, return_value="t"), + patch(_PATCH_REQUESTS_GET, return_value=response) as mock_get, + ): + art.get_download_url() + + assert mock_get.call_args.args[0] == f"{_PLATFORM_HOST}/api/v1/runs/{_RUN_ID}/artifacts/{_ARTIFACT_ID}/file" + + +@pytest.mark.unit +def test_artifact_get_download_url_redirect_without_location_raises(artifact) -> None: + """A 3xx response with no Location header is an SDK-level RuntimeError. + + Bypassing the codegen means we own the redirect contract; this asserts we + fail loudly instead of returning None/empty string. + """ + response = _redirect_response(location=None, status=HTTPStatus.TEMPORARY_REDIRECT) + + with ( + patch(_PATCH_GET_TOKEN, return_value="t"), + patch(_PATCH_REQUESTS_GET, return_value=response), + pytest.raises(RuntimeError, match="missing Location header"), + ): + artifact.get_download_url() + + +@pytest.mark.unit +def test_artifact_get_download_url_404_raises_not_found(artifact) -> None: + """404 from the /file endpoint maps to NotFoundException (codegen-style).""" + with ( + patch(_PATCH_GET_TOKEN, return_value="t"), + patch(_PATCH_REQUESTS_GET, return_value=_error_response(HTTPStatus.NOT_FOUND)), + pytest.raises(NotFoundException), + ): + artifact.get_download_url() + + +@pytest.mark.unit +@pytest.mark.parametrize( + "client_status", + [HTTPStatus.FORBIDDEN, HTTPStatus.GONE, HTTPStatus.UNPROCESSABLE_ENTITY], +) +def test_artifact_get_download_url_4xx_raises_api_exception(artifact, client_status) -> None: + """4xx responses other than 404 surface as ApiException with the original status. + + Includes 410 because the API contract says deleted artifacts return 410 Gone. + """ + with ( + patch(_PATCH_GET_TOKEN, return_value="t"), + patch(_PATCH_REQUESTS_GET, return_value=_error_response(client_status)), + pytest.raises(ApiException) as exc_info, + ): + artifact.get_download_url() + assert exc_info.value.status == client_status + + +@pytest.mark.unit +def test_artifact_get_download_url_unexpected_2xx_raises_runtime(artifact) -> None: + """A 200 (or other unexpected non-error, non-redirect) is RuntimeError. + + Per Dima's clarification on PR #478: the endpoint never returns 200 in + practice. If it ever does, we fail explicitly rather than silently passing + a body off to webbrowser.open(). + """ + with ( + patch(_PATCH_GET_TOKEN, return_value="t"), + patch(_PATCH_REQUESTS_GET, return_value=_error_response(HTTPStatus.OK)), + pytest.raises(RuntimeError, match="Unexpected status 200"), + ): + artifact.get_download_url() + + +@pytest.mark.unit +def test_artifact_get_download_url_5xx_retries_then_succeeds(artifact) -> None: + """A transient 5xx is retried; once it succeeds the presigned URL is returned.""" + error = _error_response(HTTPStatus.SERVICE_UNAVAILABLE) + success = _redirect_response(_PRESIGNED_URL) + + fake_settings = Mock() + fake_settings.run_retry_attempts = 3 + fake_settings.run_retry_wait_min = 0.0 + fake_settings.run_retry_wait_max = 0.0 + fake_settings.run_timeout = 5.0 + + with ( + patch(_PATCH_GET_TOKEN, return_value="t"), + patch(_PATCH_SETTINGS, return_value=fake_settings), + patch(_PATCH_REQUESTS_GET, side_effect=[error, success]) as mock_get, + ): + url = artifact.get_download_url() + + assert url == _PRESIGNED_URL + assert mock_get.call_count == 2 # one retry was needed + + +@pytest.mark.unit +def test_artifact_get_download_url_5xx_exhausts_retries_then_raises(artifact) -> None: + """If 5xx persists for all retry attempts, ServiceException is reraised.""" + fake_settings = Mock() + fake_settings.run_retry_attempts = 2 + fake_settings.run_retry_wait_min = 0.0 + fake_settings.run_retry_wait_max = 0.0 + fake_settings.run_timeout = 5.0 + + with ( + patch(_PATCH_GET_TOKEN, return_value="t"), + patch(_PATCH_SETTINGS, return_value=fake_settings), + patch( + _PATCH_REQUESTS_GET, + return_value=_error_response(HTTPStatus.SERVICE_UNAVAILABLE), + ) as mock_get, + pytest.raises(ServiceException), + ): + artifact.get_download_url() + assert mock_get.call_count == fake_settings.run_retry_attempts + + +@pytest.mark.unit +@pytest.mark.parametrize( + "exc_factory", + [ + lambda: requests.Timeout("timed out"), + lambda: requests.ConnectionError("dns failure"), + lambda: requests.RequestException("misc"), + ], +) +def test_artifact_get_download_url_network_errors_become_service_exception(artifact, exc_factory) -> None: + """`requests` exceptions are wrapped as ServiceException so retry can act on them. + + Without this wrapping the e2e tests in PR #507 hung — `requests.HTTPError` + escaped the retry loop and surfaced as a wrong exception type. + """ + fake_settings = Mock() + fake_settings.run_retry_attempts = 1 # don't waste test time on retries + fake_settings.run_retry_wait_min = 0.0 + fake_settings.run_retry_wait_max = 0.0 + fake_settings.run_timeout = 5.0 + + with ( + patch(_PATCH_GET_TOKEN, return_value="t"), + patch(_PATCH_SETTINGS, return_value=fake_settings), + patch(_PATCH_REQUESTS_GET, side_effect=exc_factory()), + pytest.raises(ServiceException), + ): + artifact.get_download_url() + + +@pytest.mark.unit +def test_artifact_get_download_url_honors_configuration_token_provider(configured_api) -> None: + """When configuration.token_provider is set, Artifact uses it instead of get_token. + + The codegen Client wires up token_provider to call ``get_token(use_cache=cache_token)``. + A user who instantiates ``Client(cache_token=False)`` does not want the SDK to + read/write the token file when the SDK resolves artifact URLs. This test pins + that contract — without it, Copilot's PR review caught us bypassing the user's + cache preference. + """ + custom_token_provider = Mock(return_value="cache-disabled-token") + configured_api.api_client.configuration.token_provider = custom_token_provider + art = Artifact(configured_api, _RUN_ID, _ARTIFACT_ID) + response = _redirect_response(_PRESIGNED_URL) + + with ( + # If the implementation falls back to get_token here, the test would still + # pass — so we patch get_token to a sentinel value the assertion would catch. + patch(_PATCH_GET_TOKEN, return_value="WRONG-from-fallback"), + patch(_PATCH_REQUESTS_GET, return_value=response) as mock_get, + ): + art.get_download_url() + + custom_token_provider.assert_called_once_with() + assert mock_get.call_args.kwargs["headers"]["Authorization"] == "Bearer cache-disabled-token" + + +@pytest.mark.unit +def test_artifact_get_download_url_passes_proxy_and_ca_bundle(configured_api) -> None: + """Proxy and custom CA bundle from codegen Configuration are honored. + + Enterprise installs frequently set these via env; a previous draft of this + code ignored them, which would have broken downloads behind a proxy. + """ + proxy_url = "https://corp-proxy.local:3128" + configured_api.api_client.configuration.proxy = proxy_url + configured_api.api_client.configuration.ssl_ca_cert = "/etc/ssl/corp-ca.pem" + configured_api.api_client.configuration.verify_ssl = False + art = Artifact(configured_api, _RUN_ID, _ARTIFACT_ID) + response = _redirect_response(_PRESIGNED_URL) + + with ( + patch(_PATCH_GET_TOKEN, return_value="t"), + patch(_PATCH_REQUESTS_GET, return_value=response) as mock_get, + ): + art.get_download_url() + + kwargs = mock_get.call_args.kwargs + assert kwargs["proxies"] == {"http": proxy_url, "https": proxy_url} + # CA bundle path takes precedence over verify_ssl=False + assert kwargs["verify"] == "/etc/ssl/corp-ca.pem" + + +@pytest.mark.unit +def test_run_get_artifact_download_url_delegates_to_artifact(app_run, configured_api) -> None: + """Run.get_artifact_download_url is the documented entry point and must just delegate. + + Keeping this thin protects callers from internal refactors of `Artifact`. + """ + response = _redirect_response(_PRESIGNED_URL) + with ( + patch(_PATCH_GET_TOKEN, return_value="t"), + patch(_PATCH_REQUESTS_GET, return_value=response), + ): + # Replace mock_api on the existing Run with the configured one + app_run._api = configured_api + url = app_run.get_artifact_download_url(_ARTIFACT_ID) + assert url == _PRESIGNED_URL + + +@pytest.mark.unit +def test_run_artifact_returns_artifact_handle(app_run) -> None: + """Run.artifact() returns an Artifact bound to the right run/artifact pair.""" + handle = app_run.artifact(_ARTIFACT_ID) + assert isinstance(handle, Artifact) + assert handle.run_id == _RUN_ID + assert handle.artifact_id == _ARTIFACT_ID + + +# --------------------------------------------------------------------------- +# Run.ensure_artifacts_downloaded — instance method, AVAILABLE gating, fresh URL +# --------------------------------------------------------------------------- + +_PATCH_DOWNLOAD_FILE_RUNS = "aignostics.platform.resources.runs.download_file" +_PATCH_CALC_CRC32C = "aignostics.platform.resources.runs.calculate_file_crc32c" +_PATCH_MIME_TYPE_TO_FILE_ENDING = "aignostics.platform.resources.runs.mime_type_to_file_ending" + + +def _make_artifact_mock( + *, + name: str = "result", + output_artifact_id: str = "art-1", + output: object | None = None, + metadata: dict | None = None, +) -> MagicMock: + """Build a mock OutputArtifactResultReadResponse for ensure_artifacts_downloaded tests.""" + from aignx.codegen.models import ArtifactOutput as _ArtifactOutput + + a = MagicMock() + a.name = name + a.output_artifact_id = output_artifact_id + a.output = output if output is not None else _ArtifactOutput.AVAILABLE + a.metadata = metadata if metadata is not None else {"checksum_base64_crc32c": "AAAA", "media_type": "text/csv"} + return a + + +def _make_item_mock(external_id: str = "slide-1", artifacts: list | None = None) -> MagicMock: + item = MagicMock() + item.external_id = external_id + item.output_artifacts = artifacts if artifacts is not None else [_make_artifact_mock()] + return item + + +@pytest.mark.unit +def test_ensure_artifacts_downloaded_resolves_fresh_url_per_artifact(app_run, tmp_path) -> None: + """ensure_artifacts_downloaded must call get_artifact_download_url for AVAILABLE artifacts. + + The deprecated artifact.download_url field is no longer consulted; URL is + resolved fresh per artifact via the /file endpoint right before downloading. + """ + item = _make_item_mock(artifacts=[_make_artifact_mock(output_artifact_id="art-xyz")]) + app_run.get_artifact_download_url = Mock(return_value=_PRESIGNED_URL) + + with patch(_PATCH_DOWNLOAD_FILE_RUNS) as mock_download: + app_run.ensure_artifacts_downloaded(tmp_path, item, print_status=False) + + app_run.get_artifact_download_url.assert_called_once_with("art-xyz") + mock_download.assert_called_once() + call_args = mock_download.call_args.args + assert call_args[0] == _PRESIGNED_URL # First arg is the freshly resolved URL + + +@pytest.mark.unit +def test_ensure_artifacts_downloaded_skips_non_available_artifacts(app_run, tmp_path) -> None: + """Non-AVAILABLE artifacts must be skipped — the /file endpoint won't return URLs for them. + + Per Dima on PR #478: the /file endpoint does not return a presigned URL for + artifacts whose output is NONE (e.g. deleted, never produced). Calling it + would fail the entire download. This test pins the AVAILABLE-gating. + """ + from aignx.codegen.models import ArtifactOutput as _ArtifactOutput + + none_artifact = _make_artifact_mock(output_artifact_id="art-none", output=_ArtifactOutput.NONE) + item = _make_item_mock(artifacts=[none_artifact]) + app_run.get_artifact_download_url = Mock() + + with patch(_PATCH_DOWNLOAD_FILE_RUNS) as mock_download: + app_run.ensure_artifacts_downloaded(tmp_path, item, print_status=False) + + app_run.get_artifact_download_url.assert_not_called() + mock_download.assert_not_called() + + +@pytest.mark.unit +def test_ensure_artifacts_downloaded_skips_existing_file_with_matching_checksum(app_run, tmp_path) -> None: + """If a local file already matches the metadata checksum, skip the URL fetch and the download. + + Critical: do NOT call get_artifact_download_url when we wouldn't have downloaded + anyway. Resolving a presigned URL hits SAMIA; skipping it on resume saves + backend load and shortens resume cycles. + """ + item_dir = tmp_path / "slide-1" + item_dir.mkdir() + artifact_path = item_dir / "result.csv" + artifact_path.write_bytes(b"hello") + item = _make_item_mock(artifacts=[_make_artifact_mock()]) + app_run.get_artifact_download_url = Mock() + + with ( + patch(_PATCH_CALC_CRC32C, return_value="AAAA"), # Matches metadata checksum + patch(_PATCH_MIME_TYPE_TO_FILE_ENDING, return_value=".csv"), + patch(_PATCH_DOWNLOAD_FILE_RUNS) as mock_download, + ): + app_run.ensure_artifacts_downloaded(tmp_path, item, print_status=False) + + app_run.get_artifact_download_url.assert_not_called() + mock_download.assert_not_called() + + +@pytest.mark.unit +def test_ensure_artifacts_downloaded_resumes_when_local_checksum_mismatches(app_run, tmp_path) -> None: + """Local file exists but checksum mismatches → fresh URL fetched and download retried.""" + item_dir = tmp_path / "slide-1" + item_dir.mkdir() + artifact_path = item_dir / "result.csv" + artifact_path.write_bytes(b"corrupted") + item = _make_item_mock(artifacts=[_make_artifact_mock(output_artifact_id="art-xyz")]) + app_run.get_artifact_download_url = Mock(return_value=_PRESIGNED_URL) + + with ( + patch(_PATCH_CALC_CRC32C, return_value="ZZZZ"), # Mismatch with metadata "AAAA" + patch(_PATCH_MIME_TYPE_TO_FILE_ENDING, return_value=".csv"), + patch(_PATCH_DOWNLOAD_FILE_RUNS) as mock_download, + ): + app_run.ensure_artifacts_downloaded(tmp_path, item, print_status=False) + + app_run.get_artifact_download_url.assert_called_once_with("art-xyz") + mock_download.assert_called_once() + + +@pytest.mark.unit +def test_ensure_artifacts_downloaded_skips_artifact_with_no_metadata(app_run, tmp_path) -> None: + """Artifact with empty metadata dict is skipped (no checksum to verify against). + + Per Copilot PR review on #598, the metadata check now runs *before* the + MIME lookup, so this test no longer needs to mock through the MIME helpers. + Without the reorder, an empty-metadata artifact would raise ``ValueError`` + from ``mime_type_to_file_ending`` before the early-return could fire. + """ + item = _make_item_mock(artifacts=[_make_artifact_mock(metadata={})]) + app_run.get_artifact_download_url = Mock() + + with patch(_PATCH_DOWNLOAD_FILE_RUNS) as mock_download: + app_run.ensure_artifacts_downloaded(tmp_path, item, print_status=False) + + app_run.get_artifact_download_url.assert_not_called() + mock_download.assert_not_called() + + +@pytest.mark.unit +def test_download_to_folder_post_termination_loop_filters_by_item_state(app_run, mock_api, tmp_path) -> None: + """Post-termination loop must filter items by state==TERMINATED and output==FULL. + + Regression guard for the latent enum-truthiness bug Sentry flagged on PR #478: + the original code had ``if ItemOutput.FULL:`` (always truthy because it's a + member-existence check, not a value comparison), which caused + ensure_artifacts_downloaded to be called for *every* item regardless of state + or output. This test pins that we only download items that are actually + terminated with full output. + """ + from aignx.codegen.models import ItemOutput, ItemState, RunState + + terminated_full_item = MagicMock(state=ItemState.TERMINATED, output=ItemOutput.FULL, external_id="ok") + terminated_none_item = MagicMock(state=ItemState.TERMINATED, output=ItemOutput.NONE, external_id="empty") + pending_item = MagicMock(state=ItemState.PROCESSING, output=ItemOutput.NONE, external_id="pending") + + # Run is already TERMINATED on first details() call → skip the wait loop, go to post-loop. + terminated_run_state = MagicMock(state=RunState.TERMINATED) + app_run.details = Mock(return_value=terminated_run_state) + app_run.results = Mock(return_value=[terminated_full_item, terminated_none_item, pending_item]) + app_run.ensure_artifacts_downloaded = Mock() + + app_run.download_to_folder(tmp_path, print_status=False) + + # Only the TERMINATED+FULL item should trigger ensure_artifacts_downloaded. + # If the latent enum bug were back, all 3 items would trigger it. + assert app_run.ensure_artifacts_downloaded.call_count == 1 + forwarded_item = app_run.ensure_artifacts_downloaded.call_args.args[1] + assert forwarded_item.external_id == "ok" + + +@pytest.mark.unit +def test_ensure_artifacts_downloaded_is_instance_method_not_static(app_run, tmp_path) -> None: + """Regression guard: ensure_artifacts_downloaded must be an instance method. + + PR #478 left it as @staticmethod, which made `self.get_artifact_download_url()` + inside the loop impossible. Pinning the bound-method shape so a future refactor + cannot accidentally revert. + """ + bound = app_run.ensure_artifacts_downloaded + # On a method, __self__ is the instance; on a staticmethod, __self__ doesn't exist. + assert getattr(bound, "__self__", None) is app_run diff --git a/tests/aignostics/platform/settings_test.py b/tests/aignostics/platform/settings_test.py index e34e9ee44..9a20ad347 100644 --- a/tests/aignostics/platform/settings_test.py +++ b/tests/aignostics/platform/settings_test.py @@ -37,6 +37,10 @@ REDIRECT_URI_PRODUCTION, REDIRECT_URI_STAGING, REDIRECT_URI_TEST, + STATUS_PAGE_URL_DEV, + STATUS_PAGE_URL_PRODUCTION, + STATUS_PAGE_URL_STAGING, + STATUS_PAGE_URL_TEST, TOKEN_URL_DEV, TOKEN_URL_PRODUCTION, TOKEN_URL_STAGING, @@ -96,6 +100,8 @@ def test_authentication_settings_production(record_property) -> None: assert settings.redirect_uri == REDIRECT_URI_PRODUCTION assert settings.device_url == DEVICE_URL_PRODUCTION assert settings.jws_json_url == JWS_JSON_URL_PRODUCTION + assert settings.status_page_url == STATUS_PAGE_URL_PRODUCTION + assert settings.status_page_url == "https://status.platform.aignostics.com" @pytest.mark.unit @@ -116,6 +122,8 @@ def test_authentication_settings_staging(record_property, mock_env_vars) -> None assert settings.redirect_uri == REDIRECT_URI_STAGING assert settings.device_url == DEVICE_URL_STAGING assert settings.jws_json_url == JWS_JSON_URL_STAGING + assert settings.status_page_url == STATUS_PAGE_URL_STAGING + assert settings.status_page_url == "https://status.platform-staging.aignostics.com" @pytest.mark.unit @@ -136,6 +144,8 @@ def test_authentication_settings_dev(record_property, mock_env_vars) -> None: assert settings.redirect_uri == REDIRECT_URI_DEV assert settings.device_url == DEVICE_URL_DEV assert settings.jws_json_url == JWS_JSON_URL_DEV + assert settings.status_page_url is None + assert STATUS_PAGE_URL_DEV is None @pytest.mark.unit @@ -156,6 +166,8 @@ def test_authentication_settings_test(record_property, mock_env_vars) -> None: assert settings.redirect_uri == REDIRECT_URI_TEST assert settings.device_url == DEVICE_URL_TEST assert settings.jws_json_url == JWS_JSON_URL_TEST + assert settings.status_page_url is None + assert STATUS_PAGE_URL_TEST is None @pytest.mark.unit @@ -528,3 +540,110 @@ def test_validate_retry_wait_times_min_greater_than_max(mock_env_vars) -> None: auth_retry_wait_min=10.0, auth_retry_wait_max=5.0, ) + + +@pytest.mark.unit +def test_status_page_url_env_override_takes_precedence(record_property, monkeypatch) -> None: + """User-supplied AIGNOSTICS_STATUS_PAGE_URL overrides the per-environment default.""" + record_property("tested-item-id", "SPEC-PLATFORM-SERVICE") + custom_url = "https://custom-status.example.com" + monkeypatch.setenv(f"{__project_name__.upper()}_STATUS_PAGE_URL", custom_url) + + settings = Settings(api_root=API_ROOT_PRODUCTION) + + # Env-supplied value wins over the production default. + assert settings.status_page_url == custom_url + assert settings.status_page_url != STATUS_PAGE_URL_PRODUCTION + + +@pytest.mark.unit +def test_status_page_url_explicit_argument_overrides_default(record_property) -> None: + """An explicit status_page_url argument wins over the per-environment default.""" + record_property("tested-item-id", "SPEC-PLATFORM-SERVICE") + custom_url = "https://my-org-status.example.com" + + settings = Settings(api_root=API_ROOT_STAGING, status_page_url=custom_url) + + assert settings.status_page_url == custom_url + + +@pytest.mark.unit +def test_status_page_url_empty_string_coerced_to_none(record_property, monkeypatch) -> None: + """Empty `AIGNOSTICS_STATUS_PAGE_URL` env var is coerced to None. + + Some shells / .env loaders represent "variable set but empty" as an empty string. Treating + that as a valid status page URL would render a broken iframe; coercing to None matches the + dev/test default behaviour (no badge, no menu link). + """ + record_property("tested-item-id", "SPEC-PLATFORM-SERVICE") + monkeypatch.setenv(f"{__project_name__.upper()}_STATUS_PAGE_URL", "") + + settings = Settings(api_root=API_ROOT_PRODUCTION) + + assert settings.status_page_url is None + + +@pytest.mark.unit +def test_status_page_url_explicit_none_argument(record_property) -> None: + """An explicit None argument is preserved (i.e. the env-var default doesn't kick in).""" + record_property("tested-item-id", "SPEC-PLATFORM-SERVICE") + + settings = Settings(api_root=API_ROOT_STAGING, status_page_url=None) + + assert settings.status_page_url is None + + +@pytest.mark.unit +@pytest.mark.parametrize( + "bad_url", + [ + "ftp://example.com", # non-http(s) scheme # NOSONAR S5332: literal is the invalid input we assert is rejected + "javascript:alert(1)", # javascript scheme + "file:///etc/passwd", # file scheme + "//example.com", # missing scheme + "example.com", # missing scheme + netloc + 'https://example.com" onload="alert(1)', # double-quote breakout attempt + "https://example.com' onload='alert(1)", # single-quote breakout attempt + "https://example.com