diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1b16d002f0..ec41226185 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -871,6 +871,37 @@ jobs: - name: Run tests run: cd actions/setup/js && npm test + js-qmd-index: + runs-on: ubuntu-latest + timeout-minutes: 10 + needs: validate-yaml + permissions: + contents: read + concurrency: + group: ci-${{ github.ref }}-js-qmd-index + cancel-in-progress: true + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Set up Node.js + id: setup-node + uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6 + with: + node-version: "24" + cache: npm + cache-dependency-path: actions/setup/js/package-lock.json + - name: Report Node cache status + run: | + if [ "${{ steps.setup-node.outputs.cache-hit }}" == "true" ]; then + echo "✅ Node cache hit" >> $GITHUB_STEP_SUMMARY + else + echo "⚠️ Node cache miss" >> $GITHUB_STEP_SUMMARY + fi + - name: Install npm dependencies + run: cd actions/setup/js && npm ci + - name: Run qmd_index.cjs tests + run: cd actions/setup/js && npm test -- qmd_index.test.cjs + js-integration-live-api: runs-on: ubuntu-latest timeout-minutes: 10 diff --git a/.github/workflows/dev.lock.yml b/.github/workflows/dev.lock.yml index 47335947ca..816716f6d9 100644 --- a/.github/workflows/dev.lock.yml +++ b/.github/workflows/dev.lock.yml @@ -22,29 +22,54 @@ # # Daily status report for gh-aw project # -# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"8c8abae2e173ed0fcbd79e5003187cf9b17e04ae7fd24f874ccbd71611af6387","agent_id":"copilot"} +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"992b8a4df813dfa50732c5bbc1759c89ffb2eb7b5e2c569f214c17011a56970c","agent_id":"copilot"} name: "Dev" "on": + discussion: + types: + - labeled + issues: + types: + - labeled + pull_request: + types: + - labeled schedule: - cron: "0 9 * * *" workflow_dispatch: + inputs: + item_number: + default: "" + description: The number of the issue, pull request, or discussion + required: false + type: string permissions: {} concurrency: - group: "gh-aw-${{ github.workflow }}" + group: "gh-aw-${{ github.workflow }}-${{ github.event.issue.number || github.event.pull_request.number || github.run_id }}" run-name: "Dev" jobs: activation: + needs: pre_activation + if: > + needs.pre_activation.outputs.activated == 'true' && ((github.event_name == 'issues' || github.event_name == 'pull_request' || + github.event_name == 'discussion') && github.event.label.name == 'dev' || (!(github.event_name == 'issues')) && + (!(github.event_name == 'pull_request')) && (!(github.event_name == 'discussion'))) runs-on: ubuntu-slim permissions: contents: read + discussions: write + issues: write + pull-requests: write outputs: - comment_id: "" - comment_repo: "" + comment_id: ${{ steps.add-comment.outputs.comment-id }} + comment_repo: ${{ steps.add-comment.outputs.comment-repo }} + comment_url: ${{ steps.add-comment.outputs.comment-url }} + label_command: ${{ steps.remove_trigger_label.outputs.label_name }} lockdown_check_failed: ${{ steps.generate_aw_info.outputs.lockdown_check_failed == 'true' }} model: ${{ steps.generate_aw_info.outputs.model }} steps: @@ -84,6 +109,19 @@ jobs: setupGlobals(core, github, context, exec, io); const { main } = require('${{ runner.temp }}/gh-aw/actions/generate_aw_info.cjs'); await main(core, context); + - name: Add eyes reaction for immediate feedback + id: react + if: github.event_name == 'issues' || github.event_name == 'issue_comment' || github.event_name == 'pull_request_review_comment' || github.event_name == 'discussion' || github.event_name == 'discussion_comment' || github.event_name == 'pull_request' && github.event.pull_request.head.repo.id == github.repository_id + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_REACTION: "eyes" + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/add_reaction.cjs'); + await main(); - name: Checkout .github and .agents folders uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: @@ -103,6 +141,29 @@ jobs: setupGlobals(core, github, context, exec, io); const { main } = require('${{ runner.temp }}/gh-aw/actions/check_workflow_timestamp_api.cjs'); await main(); + - name: Add comment with workflow run link + id: add-comment + if: github.event_name == 'issues' || github.event_name == 'issue_comment' || github.event_name == 'pull_request_review_comment' || github.event_name == 'discussion' || github.event_name == 'discussion_comment' || github.event_name == 'pull_request' && github.event.pull_request.head.repo.id == github.repository_id + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_WORKFLOW_NAME: "Dev" + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/add_workflow_run_comment.cjs'); + await main(); + - name: Remove trigger label + id: remove_trigger_label + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_LABEL_NAMES: '["dev"]' + with: + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/remove_trigger_label.cjs'); + await main(); - name: Create prompt with built-in context env: GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt @@ -124,6 +185,7 @@ jobs: cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md" cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" + cat "${RUNNER_TEMP}/gh-aw/prompts/qmd_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" cat << 'GH_AW_PROMPT_EOF' @@ -188,6 +250,7 @@ jobs: GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + GH_AW_NEEDS_PRE_ACTIVATION_OUTPUTS_ACTIVATED: ${{ needs.pre_activation.outputs.activated }} with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -206,7 +269,8 @@ jobs: GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: process.env.GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER, GH_AW_GITHUB_REPOSITORY: process.env.GH_AW_GITHUB_REPOSITORY, GH_AW_GITHUB_RUN_ID: process.env.GH_AW_GITHUB_RUN_ID, - GH_AW_GITHUB_WORKSPACE: process.env.GH_AW_GITHUB_WORKSPACE + GH_AW_GITHUB_WORKSPACE: process.env.GH_AW_GITHUB_WORKSPACE, + GH_AW_NEEDS_PRE_ACTIVATION_OUTPUTS_ACTIVATED: process.env.GH_AW_NEEDS_PRE_ACTIVATION_OUTPUTS_ACTIVATED } }); - name: Validate prompt placeholders @@ -228,15 +292,15 @@ jobs: retention-days: 1 agent: - needs: activation + needs: + - activation + - indexing runs-on: ubuntu-latest permissions: contents: read copilot-requests: write issues: read pull-requests: read - concurrency: - group: "gh-aw-copilot-${{ github.workflow }}" env: DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} GH_AW_ASSETS_ALLOWED_EXTS: "" @@ -312,6 +376,16 @@ jobs: GH_HOST: github.com - name: Install AWF binary run: bash ${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh v0.24.5 + - name: Restore qmd index from cache + uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + with: + key: gh-aw-qmd-2.0.1-${{ github.run_id }} + path: /tmp/gh-aw/qmd-index/ + - name: Restore qmd models cache + uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + with: + path: ~/.cache/qmd/models/ + key: qmd-models-2.0.1-${{ runner.os }} - name: Determine automatic lockdown mode for GitHub MCP Server id: determine-automatic-lockdown uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 @@ -476,6 +550,41 @@ jobs: bash ${RUNNER_TEMP}/gh-aw/actions/start_safe_outputs_server.sh + - name: Setup Node.js for qmd MCP server + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 + with: + node-version: "24" + - name: Start QMD MCP Server + id: qmd-mcp-start + env: + INDEX_PATH: /tmp/gh-aw/qmd-index/index.sqlite + NO_COLOR: '1' + NODE_LLAMA_CPP_GPU: 'false' + run: | + # Start qmd MCP server natively in HTTP mode. + # qmd must run on the host VM (not in Docker) because node-llama-cpp + # requires platform-native binaries that cannot run in a generic container. + # HTTP transport keeps MCP traffic on TCP, fully separate from stdout. + npx --yes --package @tobilu/qmd@2.0.1 qmd mcp --http --port 8181 \ + >> /tmp/qmd-mcp.log 2>&1 & + # Save PID for logs; the GitHub Actions runner terminates all processes at job end. + echo $! > /tmp/qmd-mcp.pid + + # Wait up to 120 s for the server to accept requests + echo 'Waiting for QMD MCP server on port 8181...' + for i in $(seq 1 60); do + if curl -sf http://localhost:8181/health > /dev/null 2>&1; then + echo 'QMD MCP server is ready' + break + fi + if [ "$i" -eq 60 ]; then + echo 'ERROR: QMD MCP server failed to start within 120 s' >&2 + cat /tmp/qmd-mcp.log 2>&1 || true + exit 1 + fi + sleep 2 + done + - name: Start MCP Gateway id: start-mcp-gateway env: @@ -523,6 +632,17 @@ jobs: } } }, + "qmd": { + "type": "http", + "url": "http://host.docker.internal:8181/mcp", + "guard-policies": { + "write-sink": { + "accept": [ + "*" + ] + } + } + }, "safeoutputs": { "type": "http", "url": "http://host.docker.internal:$GH_AW_SAFE_OUTPUTS_PORT", @@ -954,6 +1074,131 @@ jobs: setupGlobals(core, github, context, exec, io); const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_noop_message.cjs'); await main(); + - name: Update reaction comment with completion status + id: conclusion + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_COMMENT_ID: ${{ needs.activation.outputs.comment_id }} + GH_AW_COMMENT_REPO: ${{ needs.activation.outputs.comment_repo }} + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_WORKFLOW_NAME: "Dev" + GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} + GH_AW_DETECTION_CONCLUSION: ${{ needs.agent.outputs.detection_conclusion }} + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/notify_comment_error.cjs'); + await main(); + + indexing: + needs: activation + runs-on: aw-gpu-runner-T4 + permissions: + contents: read + timeout-minutes: 60 + steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false + - name: Setup Scripts + uses: ./actions/setup + with: + destination: ${{ runner.temp }}/gh-aw/actions + - name: Checkout repository for qmd indexing + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Restore qmd index from cache + id: qmd-cache-restore + uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + with: + key: gh-aw-qmd-2.0.1-${{ github.run_id }} + path: /tmp/gh-aw/qmd-index/ + restore-keys: | + gh-aw-qmd-2.0.1- + - name: Cache qmd models + uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + with: + path: ~/.cache/qmd/models/ + key: qmd-models-2.0.1-${{ runner.os }} + - name: Cache node-llama-cpp binaries + uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + with: + path: ~/.cache/node-llama-cpp/ + key: node-llama-cpp-2.0.1-${{ runner.os }}-${{ runner.arch }}-${{ runner.imageid }} + - name: Setup Node.js for qmd + if: steps.qmd-cache-restore.outputs.cache-hit != 'true' + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 + with: + node-version: "24" + - name: Install @tobilu/qmd SDK + if: steps.qmd-cache-restore.outputs.cache-hit != 'true' + run: | + npm install --prefix "${{ runner.temp }}/gh-aw/actions" @tobilu/qmd@2.0.1 @actions/github + - name: Build qmd index + if: steps.qmd-cache-restore.outputs.cache-hit != 'true' + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + QMD_CONFIG_JSON: | + {"dbPath":"/tmp/gh-aw/qmd-index","checkouts":[{"name":"docs","path":"${GITHUB_WORKSPACE}","patterns":["docs/src/**/*.md","docs/src/**/*.mdx"],"context":"gh-aw project documentation"}],"searches":[{"name":"issues","type":"issues","max":500,"tokenEnvVar":"QMD_SEARCH_TOKEN_0"}]} + NODE_LLAMA_CPP_GPU: "false" + QMD_SEARCH_TOKEN_0: ${{ secrets.GITHUB_TOKEN }} + with: + github-token: ${{ github.token }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/qmd_index.cjs'); + await main(); + - name: Save qmd index to cache + if: steps.qmd-cache-restore.outputs.cache-hit != 'true' + uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + with: + key: gh-aw-qmd-2.0.1-${{ github.run_id }} + path: /tmp/gh-aw/qmd-index/ + + pre_activation: + if: > + (github.event_name == 'issues' || github.event_name == 'pull_request' || github.event_name == 'discussion') && + github.event.label.name == 'dev' || (!(github.event_name == 'issues')) && (!(github.event_name == 'pull_request')) && + (!(github.event_name == 'discussion')) + runs-on: ubuntu-slim + permissions: + contents: read + outputs: + activated: ${{ steps.check_membership.outputs.is_team_member == 'true' }} + matched_command: '' + steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false + - name: Setup Scripts + uses: ./actions/setup + with: + destination: ${{ runner.temp }}/gh-aw/actions + - name: Check team membership for workflow + id: check_membership + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_REQUIRED_ROLES: admin,maintainer,write + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/check_membership.cjs'); + await main(); safe_outputs: needs: agent diff --git a/.github/workflows/dev.md b/.github/workflows/dev.md index bec7d5072f..02811962e9 100644 --- a/.github/workflows/dev.md +++ b/.github/workflows/dev.md @@ -1,6 +1,7 @@ --- on: workflow_dispatch: + label_command: dev schedule: - cron: '0 9 * * *' # Daily at 9 AM UTC name: Dev @@ -14,6 +15,20 @@ permissions: issues: read pull-requests: read +tools: + qmd: + checkouts: + - name: docs + paths: + - docs/src/**/*.md + - docs/src/**/*.mdx + context: "gh-aw project documentation" + searches: + - name: issues + type: issues + max: 500 + github-token: ${{ secrets.GITHUB_TOKEN }} + safe-outputs: create-issue: expires: 7d @@ -24,14 +39,21 @@ features: # Daily Status Report -Generate a daily status report for the gh-aw project. +Generate a daily status report for the gh-aw project, focusing on documentation quality. **Requirements:** -1. Analyze the current state of the repository -2. Check for recent commits, pull requests, and issues -3. Identify any potential issues or areas needing attention -4. Create a comprehensive daily status report -5. Post the report as an issue with the date in the title + +1. **Find documentation problems reported in issues**: Use the `qmd` search tool to query the indexed issues collection for issues that mention documentation bugs, unclear instructions, missing documentation, or incorrect documentation. Look for patterns like "docs", "documentation", "unclear", "wrong", "missing", "broken", "outdated". + +2. **Cross-reference with current documentation**: For each documentation problem found in issues, use the `qmd` search tool to query the indexed docs collection to find the relevant documentation section that the issue is referencing or that could answer the question raised. + +3. **Compile a report** summarizing: + - Issues that report documentation problems (with issue numbers and titles) + - The corresponding documentation sections that may need updating + - Any issues where the documentation actually already contains the answer (and the issue could be closed with a pointer) + - Gaps where no documentation exists for a reported problem + +4. Post the report as an issue with the date in the title. Keep the report informative but concise. diff --git a/.github/workflows/smoke-codex.lock.yml b/.github/workflows/smoke-codex.lock.yml index 999168c82b..a539d11bf5 100644 --- a/.github/workflows/smoke-codex.lock.yml +++ b/.github/workflows/smoke-codex.lock.yml @@ -27,7 +27,7 @@ # - shared/gh.md # - shared/reporting.md # -# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"8ccab7e12d1831d3a6e67bf289dbda8640b9254de4657fc2b2d8cfc3f33fcfb9","agent_id":"codex"} +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"ee94928302e2744993f1609697de1da08b593abd93e7f3a190d4c0148d049ff3","agent_id":"codex"} name: "Smoke Codex" "on": @@ -190,6 +190,7 @@ jobs: cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" cat "${RUNNER_TEMP}/gh-aw/prompts/playwright_prompt.md" + cat "${RUNNER_TEMP}/gh-aw/prompts/qmd_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/cache_memory_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" cat << 'GH_AW_PROMPT_EOF' @@ -319,7 +320,9 @@ jobs: retention-days: 1 agent: - needs: activation + needs: + - activation + - indexing runs-on: ubuntu-latest permissions: contents: read @@ -420,6 +423,16 @@ jobs: run: npm install -g @openai/codex@latest - name: Install AWF binary run: bash ${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh v0.24.5 + - name: Restore qmd index from cache + uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + with: + key: gh-aw-qmd-2.0.1-${{ github.run_id }} + path: /tmp/gh-aw/qmd-index/ + - name: Restore qmd models cache + uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + with: + path: ~/.cache/qmd/models/ + key: qmd-models-2.0.1-${{ runner.os }} - name: Determine automatic lockdown mode for GitHub MCP Server id: determine-automatic-lockdown uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 @@ -813,6 +826,41 @@ jobs: bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_scripts_server.sh + - name: Setup Node.js for qmd MCP server + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 + with: + node-version: "24" + - name: Start QMD MCP Server + id: qmd-mcp-start + env: + INDEX_PATH: /tmp/gh-aw/qmd-index/index.sqlite + NO_COLOR: '1' + NODE_LLAMA_CPP_GPU: 'false' + run: | + # Start qmd MCP server natively in HTTP mode. + # qmd must run on the host VM (not in Docker) because node-llama-cpp + # requires platform-native binaries that cannot run in a generic container. + # HTTP transport keeps MCP traffic on TCP, fully separate from stdout. + npx --yes --package @tobilu/qmd@2.0.1 qmd mcp --http --port 8181 \ + >> /tmp/qmd-mcp.log 2>&1 & + # Save PID for logs; the GitHub Actions runner terminates all processes at job end. + echo $! > /tmp/qmd-mcp.pid + + # Wait up to 120 s for the server to accept requests + echo 'Waiting for QMD MCP server on port 8181...' + for i in $(seq 1 60); do + if curl -sf http://localhost:8181/health > /dev/null 2>&1; then + echo 'QMD MCP server is ready' + break + fi + if [ "$i" -eq 60 ]; then + echo 'ERROR: QMD MCP server failed to start within 120 s' >&2 + cat /tmp/qmd-mcp.log 2>&1 || true + exit 1 + fi + sleep 2 + done + - name: Start MCP Gateway id: start-mcp-gateway env: @@ -892,6 +940,15 @@ jobs: [mcp_servers.playwright."guard-policies".write-sink] accept = ["*"] + [mcp_servers.qmd] + type = "http" + url = "http://host.docker.internal:8181/mcp" + + [mcp_servers.qmd."guard-policies"] + + [mcp_servers.qmd."guard-policies".write-sink] + accept = ["*"] + [mcp_servers.safeoutputs] type = "http" url = "http://host.docker.internal:$GH_AW_SAFE_OUTPUTS_PORT" @@ -991,6 +1048,17 @@ jobs: } } }, + "qmd": { + "type": "http", + "url": "http://host.docker.internal:8181/mcp", + "guard-policies": { + "write-sink": { + "accept": [ + "*" + ] + } + } + }, "safeoutputs": { "type": "http", "url": "http://host.docker.internal:$GH_AW_SAFE_OUTPUTS_PORT", @@ -1454,6 +1522,77 @@ jobs: const { main } = require('${{ runner.temp }}/gh-aw/actions/notify_comment_error.cjs'); await main(); + indexing: + needs: activation + runs-on: aw-gpu-runner-T4 + permissions: + contents: read + timeout-minutes: 60 + steps: + - name: Checkout actions folder + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + repository: github/gh-aw + sparse-checkout: | + actions + persist-credentials: false + - name: Setup Scripts + uses: ./actions/setup + with: + destination: ${{ runner.temp }}/gh-aw/actions + - name: Checkout repository for qmd indexing + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Restore qmd index from cache + id: qmd-cache-restore + uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + with: + key: gh-aw-qmd-2.0.1-${{ github.run_id }} + path: /tmp/gh-aw/qmd-index/ + restore-keys: | + gh-aw-qmd-2.0.1- + - name: Cache qmd models + uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + with: + path: ~/.cache/qmd/models/ + key: qmd-models-2.0.1-${{ runner.os }} + - name: Cache node-llama-cpp binaries + uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + with: + path: ~/.cache/node-llama-cpp/ + key: node-llama-cpp-2.0.1-${{ runner.os }}-${{ runner.arch }}-${{ runner.imageid }} + - name: Setup Node.js for qmd + if: steps.qmd-cache-restore.outputs.cache-hit != 'true' + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 + with: + node-version: "24" + - name: Install @tobilu/qmd SDK + if: steps.qmd-cache-restore.outputs.cache-hit != 'true' + run: | + npm install --prefix "${{ runner.temp }}/gh-aw/actions" @tobilu/qmd@2.0.1 @actions/github + - name: Build qmd index + if: steps.qmd-cache-restore.outputs.cache-hit != 'true' + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + QMD_CONFIG_JSON: | + {"dbPath":"/tmp/gh-aw/qmd-index","checkouts":[{"name":"docs","path":"${GITHUB_WORKSPACE}","patterns":["docs/src/**/*.md","docs/src/**/*.mdx"],"context":"gh-aw project documentation"}],"searches":[{"name":"issues","type":"issues","max":500,"tokenEnvVar":"QMD_SEARCH_TOKEN_0"}]} + NODE_LLAMA_CPP_GPU: "false" + QMD_SEARCH_TOKEN_0: ${{ secrets.GITHUB_TOKEN }} + with: + github-token: ${{ github.token }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('${{ runner.temp }}/gh-aw/actions/qmd_index.cjs'); + await main(); + - name: Save qmd index to cache + if: steps.qmd-cache-restore.outputs.cache-hit != 'true' + uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + with: + key: gh-aw-qmd-2.0.1-${{ github.run_id }} + path: /tmp/gh-aw/qmd-index/ + pre_activation: if: > (github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id) && diff --git a/.github/workflows/smoke-codex.md b/.github/workflows/smoke-codex.md index 49ce9c71e9..12676ee80c 100644 --- a/.github/workflows/smoke-codex.md +++ b/.github/workflows/smoke-codex.md @@ -34,6 +34,18 @@ tools: languages: go: {} web-fetch: + qmd: + checkouts: + - name: docs + paths: + - docs/src/**/*.md + - docs/src/**/*.mdx + context: "gh-aw project documentation" + searches: + - name: issues + type: issues + max: 500 + github-token: ${{ secrets.GITHUB_TOKEN }} runtimes: go: version: "1.25" diff --git a/actions/setup/js/qmd_index.cjs b/actions/setup/js/qmd_index.cjs new file mode 100644 index 0000000000..02453c8209 --- /dev/null +++ b/actions/setup/js/qmd_index.cjs @@ -0,0 +1,264 @@ +// @ts-check +/// +"use strict"; + +const fs = require("fs"); +const path = require("path"); +const { pathToFileURL } = require("url"); + +/** + * @typedef {{ name: string, path: string, patterns?: string[], context?: string }} QmdCheckout + * @typedef {{ name?: string, type?: string, query?: string, repo?: string, min?: number, max?: number, tokenEnvVar?: string }} QmdSearch + * @typedef {{ dbPath: string, checkouts?: QmdCheckout[], searches?: QmdSearch[] }} QmdConfig + */ + +/** + * Resolves `${ENV_VAR}` placeholders in a path string using the current process environment. + * @param {string} p + * @returns {string} + */ +function resolveEnvVars(p) { + return p.replace(/\$\{([^}]+)\}/g, (_, name) => process.env[name] || ""); +} + +/** + * Returns an Octokit client for the given token env var, or the default github client. + * @param {string | undefined} tokenEnvVar + * @returns {Promise} + */ +async function getClient(tokenEnvVar) { + if (tokenEnvVar && process.env[tokenEnvVar]) { + const { getOctokit } = await import("@actions/github"); + return getOctokit(process.env[tokenEnvVar]); + } + return github; +} + +/** + * Writes the step summary to $GITHUB_STEP_SUMMARY via core.summary. + * @param {QmdConfig} config + * @param {{ indexed: number, updated: number, unchanged: number, removed: number } | null} updateResult + * @param {{ embedded: number } | null} embedResult + */ +async function writeSummary(config, updateResult, embedResult) { + try { + let md = "## qmd documentation index\n\n"; + + if ((config.checkouts || []).length > 0) { + md += "### Collections\n\n"; + md += "| Name | Patterns | Context |\n"; + md += "| --- | --- | --- |\n"; + for (const col of config.checkouts) { + const patterns = (col.patterns || ["**/*.md"]).join(", "); + const ctx = col.context || "-"; + md += `| ${col.name} | ${patterns} | ${ctx} |\n`; + } + md += "\n"; + } + + if ((config.searches || []).length > 0) { + md += "### Searches\n\n"; + md += "| Name | Type | Query / Repo | Min | Max |\n"; + md += "| --- | --- | --- | --- | --- |\n"; + for (const s of config.searches) { + const name = s.name || "-"; + const type = s.type || "code"; + const ref = (s.query || s.repo || "-").replace(/\|/g, "\\|"); + const min = s.min > 0 ? String(s.min) : "-"; + const max = String(s.max > 0 ? s.max : type === "issues" ? 500 : 30); + md += `| ${name} | ${type} | ${ref} | ${min} | ${max} |\n`; + } + md += "\n"; + } + + if (updateResult) { + md += "### Index stats\n\n"; + md += "| Stat | Value |\n"; + md += "| --- | --- |\n"; + md += `| Indexed | ${updateResult.indexed} |\n`; + md += `| Updated | ${updateResult.updated} |\n`; + md += `| Unchanged | ${updateResult.unchanged} |\n`; + md += `| Removed | ${updateResult.removed} |\n`; + if (embedResult) { + md += `| Embedded | ${embedResult.embedded} |\n`; + } + } + + await core.summary.addRaw(md).write(); + } catch (/** @type {any} */ err) { + core.warning(`Could not write step summary: ${err.message}`); + } +} + +/** + * Main entry point for building the qmd documentation index. + * + * Reads the JSON config from the QMD_CONFIG_JSON environment variable, uses the + * @tobilu/qmd JavaScript SDK to create a vector-search store, registers all + * configured collections (from checkouts and GitHub searches), then calls + * store.update() and store.embed() to index the files and save the collection. + * + * Called from an actions/github-script step via: + * const { main } = require('/tmp/gh-aw/actions/qmd_index.cjs'); + * await main(); + */ +async function main() { + const configJson = process.env.QMD_CONFIG_JSON; + if (!configJson) { + core.setFailed("QMD_CONFIG_JSON environment variable not set"); + return; + } + + /** @type {QmdConfig} */ + const config = JSON.parse(configJson); + + // Load @tobilu/qmd SDK (ESM-only package) via dynamic import. + // The package is installed into the gh-aw actions directory by a prior npm-install step. + const qmdIndexPath = path.join(__dirname, "node_modules", "@tobilu", "qmd", "dist", "index.js"); + if (!fs.existsSync(qmdIndexPath)) { + core.setFailed(`@tobilu/qmd not found at ${qmdIndexPath}. The 'Install @tobilu/qmd SDK' step must run first.`); + return; + } + + const { createStore } = /** @type {any} */ await import(pathToFileURL(qmdIndexPath).href); + + // Ensure the index directory exists. + fs.mkdirSync(config.dbPath, { recursive: true }); + const dbPath = path.join(config.dbPath, "index.sqlite"); + + // ── Build collections config from checkout entries ────────────────────── + /** @type {Record }>} */ + const collections = {}; + + for (const checkout of config.checkouts || []) { + const resolvedPath = resolveEnvVars(checkout.path); + const pattern = (checkout.patterns || ["**/*.md"]).join(","); + collections[checkout.name] = { + path: resolvedPath, + pattern, + ...(checkout.context ? { context: { "/": checkout.context } } : {}), + }; + } + + // ── Process search entries ─────────────────────────────────────────────── + for (let i = 0; i < (config.searches || []).length; i++) { + const search = config.searches[i]; + const collectionName = search.name || `search-${i}`; + const searchDir = `/tmp/gh-aw/qmd-search-${i}`; + fs.mkdirSync(searchDir, { recursive: true }); + + const client = await getClient(search.tokenEnvVar); + + if (search.type === "issues") { + const repoSlug = search.repo || process.env.GITHUB_REPOSITORY || ""; + const slugParts = repoSlug.split("/"); + if (slugParts.length < 2 || !slugParts[0] || !slugParts[1]) { + core.setFailed(`qmd search "${collectionName}": invalid repository slug "${repoSlug}" (expected "owner/repo")`); + return; + } + const [owner, repo] = slugParts; + const maxCount = search.max > 0 ? search.max : 500; + + core.info(`Fetching issues from ${repoSlug} (max: ${maxCount})…`); + + // Paginate until we have accumulated enough issues across all pages. + let accumulated = 0; + const issues = await client.paginate(client.rest.issues.listForRepo, { owner, repo, state: "open", per_page: 100 }, (/** @type {{ data: any[] }} */ response, done) => { + accumulated += response.data.length; + if (accumulated >= maxCount) done(); + return response.data; + }); + + const slice = issues.slice(0, maxCount); + for (const issue of slice) { + const content = `## ${issue.number}: ${issue.title}\n\n${issue.body || ""}`; + fs.writeFileSync(path.join(searchDir, `issue-${issue.number}.md`), content, "utf8"); + } + core.info(`Saved ${slice.length} issues to ${searchDir}`); + } else { + // Code search: download matching files via GitHub REST API. + const maxCount = search.max > 0 ? search.max : 30; + core.info(`Searching GitHub code: "${search.query}" (max: ${maxCount})…`); + + const response = await client.rest.search.code({ + q: search.query, + per_page: Math.min(maxCount, 100), + }); + + let downloaded = 0; + for (const item of response.data.items) { + const fullNameParts = item.repository.full_name.split("/"); + if (fullNameParts.length < 2) continue; + const [owner, repo] = fullNameParts; + try { + const fileResp = await client.rest.repos.getContent({ + owner, + repo, + path: item.path, + }); + const data = /** @type {any} */ fileResp.data; + if (data.type === "file" && data.content) { + const fileContent = Buffer.from(data.content, "base64").toString("utf8"); + const safeName = `${owner}-${repo}-${item.path.replace(/\//g, "-")}`; + fs.writeFileSync(path.join(searchDir, safeName), fileContent, "utf8"); + downloaded++; + } + } catch (/** @type {any} */ err) { + core.warning(`Could not download ${item.repository.full_name}/${item.path}: ${err.message}`); + } + } + core.info(`Downloaded ${downloaded} files to ${searchDir}`); + } + + // Enforce minimum result count. + if (search.min > 0) { + const fileCount = fs.readdirSync(searchDir).length; + if (fileCount < search.min) { + core.setFailed(`qmd search "${collectionName}" returned ${fileCount} results, minimum is ${search.min}`); + return; + } + } + + collections[collectionName] = { + path: searchDir, + pattern: "**/*", + }; + } + + // ── Create store and build index ───────────────────────────────────────── + core.info(`Creating qmd store at ${dbPath}…`); + + const store = await createStore({ dbPath, config: { collections } }); + + let updateResult = null; + let embedResult = null; + + try { + core.info("Indexing files (update)…"); + updateResult = await store.update({ + onProgress: (/** @type {{ collection: string, file: string, current: number, total: number }} */ info) => { + if (info.current % 50 === 0 || info.current === info.total) { + core.debug(`[${info.collection}] ${info.current}/${info.total}: ${info.file}`); + } + }, + }); + core.info(`Update complete: ${updateResult.indexed} indexed, ${updateResult.updated} updated, ` + `${updateResult.unchanged} unchanged, ${updateResult.removed} removed`); + + core.info("Generating embeddings (embed)…"); + embedResult = await store.embed({ + onProgress: (/** @type {{ current: number, total: number }} */ info) => { + if (info.current % 20 === 0 || info.current === info.total) { + core.debug(`Embedding ${info.current}/${info.total}`); + } + }, + }); + core.info(`Embed complete: ${embedResult.embedded} embedded`); + } finally { + await store.close(); + await writeSummary(config, updateResult, embedResult); + } + + core.info("qmd index built successfully"); +} + +module.exports = { main }; diff --git a/actions/setup/js/qmd_index.test.cjs b/actions/setup/js/qmd_index.test.cjs new file mode 100644 index 0000000000..682cdd0181 --- /dev/null +++ b/actions/setup/js/qmd_index.test.cjs @@ -0,0 +1,484 @@ +// @ts-check +/// +import { describe, it, expect, beforeEach, afterEach, beforeAll, afterAll, vi } from "vitest"; +import fs from "fs"; +import path from "path"; +import os from "os"; +import { createRequire } from "module"; + +// --- Fake @tobilu/qmd SDK setup ----------------------------------------------- +// +// qmd_index.cjs dynamically imports the SDK via: +// await import(pathToFileURL(path.join(__dirname, "node_modules/@tobilu/qmd/dist/index.js"))) +// +// When the real package is not installed we create a minimal fake ESM module at +// that path so the dynamic import succeeds. The fake createStore() returns +// globalThis.__qmdMockStore__, which is set fresh in beforeEach. +// +// Node's ES-module cache means the import() is only evaluated once across all +// tests. Updating globalThis.__qmdMockStore__ between tests is therefore the +// mechanism for giving each test a fresh mock store. +// +// If @tobilu/qmd is already installed (e.g. in a dedicated CI integration job) +// the fake is not created; the real SDK's createStore() would be called, but +// because this scenario is only encountered in a CI job that specifically +// installs the package, both the unit tests (fake SDK) and the integration CI +// job (real SDK with a minimal fixture) are covered. + +const SCRIPT_DIR = import.meta.dirname; +const SDK_DIST_DIR = path.join(SCRIPT_DIR, "node_modules", "@tobilu", "qmd", "dist"); +const SDK_PATH = path.join(SDK_DIST_DIR, "index.js"); +const SDK_PKG_PATH = path.join(SCRIPT_DIR, "node_modules", "@tobilu", "qmd", "package.json"); + +const sdkAlreadyInstalled = fs.existsSync(SDK_PATH); + +// Minimal ESM module that proxies through the per-test mock store global. +const FAKE_SDK_ESM = `export async function createStore() { + return globalThis.__qmdMockStore__; +} +`; +const FAKE_SDK_PKG = JSON.stringify({ type: "module", main: "dist/index.js" }); + +// --- Load module under test --------------------------------------------------- +// +// Load once; globals (core, github) and process.env are read at call time so +// changing them in beforeEach / afterEach affects each test independently. + +const _require = createRequire(import.meta.url); +const { main } = _require("./qmd_index.cjs"); + +// --- Helpers ------------------------------------------------------------------ + +/** Creates a fresh mock store returned by the fake createStore(). */ +function makeMockStore() { + return { + update: vi.fn().mockResolvedValue({ indexed: 2, updated: 0, unchanged: 0, removed: 0 }), + embed: vi.fn().mockResolvedValue({ embedded: 2 }), + close: vi.fn().mockResolvedValue(undefined), + }; +} + +// --- Test suite --------------------------------------------------------------- + +describe("qmd_index.cjs", () => { + let mockCore; + let mockGithub; + let mockStore; + let tmpDir; + + // ── Global setup: create fake SDK if needed ─────────────────────────────── + beforeAll(() => { + if (!sdkAlreadyInstalled) { + // { recursive: true } creates all parent directories (including node_modules) + // so this works even in a fresh clone before npm install. + fs.mkdirSync(SDK_DIST_DIR, { recursive: true }); + fs.writeFileSync(SDK_PATH, FAKE_SDK_ESM, "utf8"); + fs.writeFileSync(SDK_PKG_PATH, FAKE_SDK_PKG, "utf8"); + } + }); + + afterAll(() => { + if (!sdkAlreadyInstalled) { + const tobiluScope = path.join(SCRIPT_DIR, "node_modules", "@tobilu"); + if (fs.existsSync(tobiluScope)) { + fs.rmSync(tobiluScope, { recursive: true, force: true }); + } + } + }); + + // ── Per-test setup ──────────────────────────────────────────────────────── + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "qmd-test-")); + + mockStore = makeMockStore(); + globalThis.__qmdMockStore__ = mockStore; + + mockCore = { + info: vi.fn(), + debug: vi.fn(), + warning: vi.fn(), + error: vi.fn(), + setFailed: vi.fn(), + summary: { + addRaw: vi.fn().mockReturnThis(), + write: vi.fn().mockResolvedValue(undefined), + }, + }; + + mockGithub = { + rest: { + issues: { listForRepo: vi.fn() }, + search: { + code: vi.fn().mockResolvedValue({ data: { items: [] } }), + }, + repos: { getContent: vi.fn() }, + }, + paginate: vi.fn().mockResolvedValue([]), + }; + + global.core = mockCore; + global.github = mockGithub; + delete process.env.QMD_CONFIG_JSON; + delete process.env.GITHUB_REPOSITORY; + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + // Clean up qmd search dirs written to /tmp/gh-aw/qmd-search-N by the script. + // These paths are hardcoded in qmd_index.cjs (Linux-specific, mirrors the + // GitHub Actions runner environment). We clean indices 0-9 which covers + // all configs tested here (none use more than 2 search entries). + for (let i = 0; i < 10; i++) { + const d = `/tmp/gh-aw/qmd-search-${i}`; + if (fs.existsSync(d)) fs.rmSync(d, { recursive: true, force: true }); + } + delete globalThis.__qmdMockStore__; + delete global.core; + delete global.github; + vi.restoreAllMocks(); + }); + + // ── Helper ──────────────────────────────────────────────────────────────── + /** + * Sets QMD_CONFIG_JSON and invokes main(). + * @param {object | undefined} config Pass undefined to leave the env var unset. + */ + async function runMain(config) { + if (config !== undefined) { + process.env.QMD_CONFIG_JSON = JSON.stringify(config); + } + await main(); + } + + // ── Error path: missing config ───────────────────────────────────────────── + it("fails when QMD_CONFIG_JSON is not set", async () => { + await runMain(undefined); + expect(mockCore.setFailed).toHaveBeenCalledWith("QMD_CONFIG_JSON environment variable not set"); + expect(mockStore.update).not.toHaveBeenCalled(); + }); + + // ── Error path: SDK not installed ───────────────────────────────────────── + it("fails when @tobilu/qmd SDK is not found", async () => { + const realExistsSync = fs.existsSync.bind(fs); + vi.spyOn(fs, "existsSync").mockImplementation(p => { + // Use exact path comparison (same value the script computes) rather than + // substring search to avoid accidentally suppressing unrelated lookups. + if (path.normalize(String(p)) === path.normalize(SDK_PATH)) return false; + return realExistsSync(p); + }); + + await runMain({ dbPath: path.join(tmpDir, "index") }); + + expect(mockCore.setFailed).toHaveBeenCalledWith(expect.stringContaining("@tobilu/qmd not found at")); + expect(mockStore.update).not.toHaveBeenCalled(); + }); + + // ── Checkout collection: basic usage ────────────────────────────────────── + it("builds index from a checkout collection", async () => { + const docsDir = path.join(tmpDir, "docs"); + fs.mkdirSync(docsDir); + fs.writeFileSync(path.join(docsDir, "readme.md"), "# README\nHello world"); + fs.writeFileSync(path.join(docsDir, "guide.md"), "# Guide\nFoo bar"); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + checkouts: [{ name: "docs", path: docsDir, patterns: ["**/*.md"], context: "Project docs" }], + }); + + expect(mockCore.setFailed).not.toHaveBeenCalled(); + expect(mockStore.update).toHaveBeenCalledOnce(); + expect(mockStore.embed).toHaveBeenCalledOnce(); + expect(mockStore.close).toHaveBeenCalledOnce(); + }); + + // ── Checkout collection: env-var expansion ──────────────────────────────── + it("resolves ${ENV_VAR} placeholders in checkout paths", async () => { + const workspaceDir = path.join(tmpDir, "workspace"); + fs.mkdirSync(workspaceDir); + process.env.GITHUB_WORKSPACE = workspaceDir; + + await runMain({ + dbPath: path.join(tmpDir, "index"), + checkouts: [{ name: "docs", path: "${GITHUB_WORKSPACE}", patterns: ["**/*.md"] }], + }); + + expect(mockCore.setFailed).not.toHaveBeenCalled(); + expect(mockStore.update).toHaveBeenCalledOnce(); + }); + + // ── Checkout collection: default pattern ───────────────────────────────── + it("uses **/*.md as the default pattern when none specified", async () => { + const docsDir = path.join(tmpDir, "docs"); + fs.mkdirSync(docsDir); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + checkouts: [{ name: "docs", path: docsDir }], + }); + + expect(mockCore.setFailed).not.toHaveBeenCalled(); + expect(mockStore.update).toHaveBeenCalledOnce(); + }); + + // ── Issues search: valid repo ───────────────────────────────────────────── + it("fetches issues and saves them as markdown files", async () => { + process.env.GITHUB_REPOSITORY = "owner/repo"; + mockGithub.paginate.mockResolvedValue([ + { number: 1, title: "First issue", body: "Body one" }, + { number: 2, title: "Second issue", body: "Body two" }, + ]); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + searches: [{ name: "issues", type: "issues", max: 10 }], + }); + + expect(mockGithub.paginate).toHaveBeenCalledOnce(); + expect(mockCore.setFailed).not.toHaveBeenCalled(); + expect(mockStore.update).toHaveBeenCalledOnce(); + + // The script writes search results to /tmp/gh-aw/qmd-search-N (hardcoded in + // qmd_index.cjs, Linux-specific, mirrors the GitHub Actions runner). + const searchDir = "/tmp/gh-aw/qmd-search-0"; + if (fs.existsSync(searchDir)) { + const files = fs.readdirSync(searchDir); + expect(files).toContain("issue-1.md"); + expect(files).toContain("issue-2.md"); + const content = fs.readFileSync(path.join(searchDir, "issue-1.md"), "utf8"); + expect(content).toContain("## 1: First issue"); + } + }); + + // ── Issues search: explicit repo field ─────────────────────────────────── + it("uses explicit repo field instead of GITHUB_REPOSITORY for issues search", async () => { + process.env.GITHUB_REPOSITORY = "default/repo"; + mockGithub.paginate.mockResolvedValue([]); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + searches: [{ name: "issues", type: "issues", repo: "explicit/repo" }], + }); + + expect(mockGithub.paginate).toHaveBeenCalledWith(expect.anything(), expect.objectContaining({ owner: "explicit", repo: "repo" }), expect.any(Function)); + expect(mockCore.setFailed).not.toHaveBeenCalled(); + }); + + // ── Issues search: invalid slug (no slash) ─────────────────────────────── + it("fails when issues search repo slug has no slash", async () => { + await runMain({ + dbPath: path.join(tmpDir, "index"), + searches: [{ name: "issues", type: "issues", repo: "invalid-no-slash" }], + }); + + expect(mockCore.setFailed).toHaveBeenCalledWith(expect.stringContaining('invalid repository slug "invalid-no-slash"')); + expect(mockStore.update).not.toHaveBeenCalled(); + }); + + // ── Issues search: empty slug (GITHUB_REPOSITORY not set) ──────────────── + it("fails when issues search slug is empty (GITHUB_REPOSITORY unset)", async () => { + await runMain({ + dbPath: path.join(tmpDir, "index"), + searches: [{ name: "issues", type: "issues" }], + }); + + expect(mockCore.setFailed).toHaveBeenCalledWith(expect.stringContaining("invalid repository slug")); + expect(mockStore.update).not.toHaveBeenCalled(); + }); + + // ── Issues search: min count enforcement ───────────────────────────────── + it("fails when issues search returns fewer results than min", async () => { + process.env.GITHUB_REPOSITORY = "owner/repo"; + mockGithub.paginate.mockResolvedValue([{ number: 1, title: "Only one", body: "" }]); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + searches: [{ name: "issues", type: "issues", min: 5 }], + }); + + expect(mockCore.setFailed).toHaveBeenCalledWith(expect.stringContaining("minimum is 5")); + expect(mockStore.update).not.toHaveBeenCalled(); + }); + + // ── Code search: downloads files ────────────────────────────────────────── + it("downloads code search results and registers them as a collection", async () => { + mockGithub.rest.search.code.mockResolvedValue({ + data: { + items: [ + { path: "docs/README.md", repository: { full_name: "owner/repo" } }, + { path: "docs/guide.md", repository: { full_name: "owner/repo" } }, + ], + }, + }); + mockGithub.rest.repos.getContent.mockResolvedValue({ + data: { type: "file", content: Buffer.from("# Content").toString("base64") }, + }); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + searches: [{ name: "api-docs", query: "repo:owner/repo language:Markdown path:docs/", max: 10 }], + }); + + expect(mockGithub.rest.search.code).toHaveBeenCalledWith(expect.objectContaining({ q: "repo:owner/repo language:Markdown path:docs/" })); + expect(mockGithub.rest.repos.getContent).toHaveBeenCalledTimes(2); + expect(mockCore.setFailed).not.toHaveBeenCalled(); + expect(mockStore.update).toHaveBeenCalledOnce(); + + // /tmp/gh-aw/qmd-search-0 is the hardcoded search dir in qmd_index.cjs. + const searchDir = "/tmp/gh-aw/qmd-search-0"; + if (fs.existsSync(searchDir)) { + const files = fs.readdirSync(searchDir); + expect(files.some(f => f.includes("owner-repo-docs-README.md"))).toBe(true); + expect(files.some(f => f.includes("owner-repo-docs-guide.md"))).toBe(true); + } + }); + + // ── Code search: min count enforcement ─────────────────────────────────── + it("fails when code search returns fewer results than min", async () => { + mockGithub.rest.search.code.mockResolvedValue({ data: { items: [] } }); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + searches: [{ name: "docs", query: "repo:owner/repo", min: 3 }], + }); + + expect(mockCore.setFailed).toHaveBeenCalledWith(expect.stringContaining("minimum is 3")); + expect(mockStore.update).not.toHaveBeenCalled(); + }); + + // ── Code search: download error is a warning, not a failure ────────────── + it("emits a warning (not failure) when getContent throws for a code search item", async () => { + mockGithub.rest.search.code.mockResolvedValue({ + data: { + items: [{ path: "README.md", repository: { full_name: "owner/repo" } }], + }, + }); + mockGithub.rest.repos.getContent.mockRejectedValue(new Error("404 Not Found")); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + searches: [{ name: "docs", query: "repo:owner/repo" }], + }); + + expect(mockCore.warning).toHaveBeenCalledWith(expect.stringContaining("Could not download owner/repo/README.md")); + expect(mockCore.setFailed).not.toHaveBeenCalled(); + expect(mockStore.update).toHaveBeenCalledOnce(); + }); + + // ── Code search: skip items with malformed full_name ───────────────────── + it("skips code search items whose repository full_name has no slash", async () => { + mockGithub.rest.search.code.mockResolvedValue({ + data: { + items: [{ path: "file.md", repository: { full_name: "no-slash" } }], + }, + }); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + searches: [{ name: "docs", query: "test" }], + }); + + expect(mockGithub.rest.repos.getContent).not.toHaveBeenCalled(); + expect(mockCore.setFailed).not.toHaveBeenCalled(); + expect(mockStore.update).toHaveBeenCalledOnce(); + }); + + // ── Combined: checkouts + searches ─────────────────────────────────────── + it("combines checkout collections and search results into one index", async () => { + const docsDir = path.join(tmpDir, "docs"); + fs.mkdirSync(docsDir); + fs.writeFileSync(path.join(docsDir, "readme.md"), "# README"); + + process.env.GITHUB_REPOSITORY = "owner/repo"; + mockGithub.paginate.mockResolvedValue([{ number: 10, title: "Issue", body: "" }]); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + checkouts: [{ name: "docs", path: docsDir, patterns: ["**/*.md"] }], + searches: [{ name: "issues", type: "issues", max: 50 }], + }); + + expect(mockCore.setFailed).not.toHaveBeenCalled(); + expect(mockStore.update).toHaveBeenCalledOnce(); + expect(mockStore.embed).toHaveBeenCalledOnce(); + expect(mockStore.close).toHaveBeenCalledOnce(); + }); + + // ── finally: store.close() always called ───────────────────────────────── + it("always calls store.close() even when store.update() throws", async () => { + const docsDir = path.join(tmpDir, "docs"); + fs.mkdirSync(docsDir); + mockStore.update.mockRejectedValue(new Error("update failed")); + + await expect( + runMain({ + dbPath: path.join(tmpDir, "index"), + checkouts: [{ name: "docs", path: docsDir }], + }) + ).rejects.toThrow("update failed"); + + expect(mockStore.close).toHaveBeenCalledOnce(); + }); + + // ── writeSummary: checkouts section ────────────────────────────────────── + it("writes step summary with a collections table for checkouts", async () => { + const docsDir = path.join(tmpDir, "docs"); + fs.mkdirSync(docsDir); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + checkouts: [{ name: "docs", path: docsDir, patterns: ["**/*.md", "**/*.mdx"], context: "Project docs" }], + }); + + const summaryText = mockCore.summary.addRaw.mock.calls.flat().join("\n"); + expect(summaryText).toContain("### Collections"); + expect(summaryText).toContain("| docs | **/*.md, **/*.mdx | Project docs |"); + expect(mockCore.summary.write).toHaveBeenCalledOnce(); + }); + + // ── writeSummary: searches section ─────────────────────────────────────── + it("writes step summary with a searches table", async () => { + mockGithub.rest.search.code.mockResolvedValue({ data: { items: [] } }); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + searches: [{ name: "api-docs", query: "repo:owner/repo language:Markdown", min: 0, max: 20 }], + }); + + const summaryText = mockCore.summary.addRaw.mock.calls.flat().join("\n"); + expect(summaryText).toContain("### Searches"); + expect(summaryText).toContain("| api-docs | code | repo:owner/repo language:Markdown |"); + }); + + // ── writeSummary: index stats section ──────────────────────────────────── + it("writes step summary with update and embed statistics", async () => { + const docsDir = path.join(tmpDir, "docs"); + fs.mkdirSync(docsDir); + mockStore.update.mockResolvedValue({ indexed: 7, updated: 2, unchanged: 1, removed: 0 }); + mockStore.embed.mockResolvedValue({ embedded: 9 }); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + checkouts: [{ name: "docs", path: docsDir }], + }); + + const summaryText = mockCore.summary.addRaw.mock.calls.flat().join("\n"); + expect(summaryText).toContain("### Index stats"); + expect(summaryText).toContain("| Indexed | 7 |"); + expect(summaryText).toContain("| Embedded | 9 |"); + }); + + // ── writeSummary: error handling ───────────────────────────────────────── + it("emits a warning (not failure) when writing the step summary throws", async () => { + const docsDir = path.join(tmpDir, "docs"); + fs.mkdirSync(docsDir); + mockCore.summary.write.mockRejectedValue(new Error("step summary unavailable")); + + await runMain({ + dbPath: path.join(tmpDir, "index"), + checkouts: [{ name: "docs", path: docsDir }], + }); + + expect(mockCore.warning).toHaveBeenCalledWith(expect.stringContaining("Could not write step summary")); + expect(mockCore.setFailed).not.toHaveBeenCalled(); + }); +}); diff --git a/actions/setup/md/qmd_prompt.md b/actions/setup/md/qmd_prompt.md new file mode 100644 index 0000000000..77af4d49bb --- /dev/null +++ b/actions/setup/md/qmd_prompt.md @@ -0,0 +1,15 @@ + +Use the qmd search tool to find relevant documentation files using vector similarity — it queries a local index built from the configured documentation globs. Read the returned file paths to get full content. + +**Always use the qmd search tool first** when you need to find, verify, or search documentation: +- **Before using `find` or `bash` to list files** — use qmd search to discover the most relevant docs for a topic +- **Before writing new content** — search first to check whether documentation already exists +- **When identifying relevant files** — use it to narrow down which documentation pages cover a feature or concept +- **When understanding a term or concept** — query to find authoritative documentation describing it + +**Usage tips:** +- Use descriptive, natural language queries: e.g., `"how to configure MCP servers"` or `"safe-outputs create-pull-request options"` or `"permissions frontmatter field"` +- Always read the returned file paths to get the full content — the qmd search tool returns paths only, not content +- Combine multiple targeted queries rather than one broad query for better coverage +- A lower score threshold gives broader results; a higher one (e.g., `0.6`) returns only the most closely matching files + diff --git a/docs/src/content/docs/reference/qmd.md b/docs/src/content/docs/reference/qmd.md new file mode 100644 index 0000000000..d39910702d --- /dev/null +++ b/docs/src/content/docs/reference/qmd.md @@ -0,0 +1,196 @@ +--- +title: QMD Documentation Search +description: Build a local vector search index over documentation files and expose it as an MCP tool so agents can find relevant docs without contents:read permission in the agent job. +sidebar: + order: 730 +--- + +import { Aside } from "@astrojs/starlight/components"; + + + +The `qmd:` tool integrates [tobi/qmd](https://github.com/tobi/qmd) as a built-in MCP server that performs **vector similarity search** over documentation files. The search index is built in a dedicated `indexing` job (which has `contents: read`) and shared with the agent job via `actions/cache`, so the agent job does not need `contents: read`. + +## How it works + +1. **Indexing job** — installs `@tobilu/qmd`, registers documentation collections from configured checkouts and/or GitHub searches, builds the vector index, and saves it to `actions/cache`. +2. **Agent job** — restores the qmd cache (index and models) and starts qmd as an MCP server (`qmd mcp --http`). The agent can call the `search` tool to find relevant documentation files by natural language query. + +The embedding models used to build and query the index are automatically cached in both jobs via `actions/cache` (keyed by OS at `~/.cache/qmd/models/`), so models are only downloaded once per runner OS. + +## Quick start + +```aw wrap +--- +tools: + qmd: + checkouts: + - name: docs + paths: + - docs/**/*.md + - .github/**/*.md +--- +``` + +This indexes all markdown files under `docs/` and `.github/` in the current repository. + +## Configuration + +### Checkouts form + +Index files from one or more named collections, each with an optional repository checkout: + +```yaml wrap +tools: + qmd: + checkouts: + - name: current-docs + paths: + - docs/**/*.md + context: "Project documentation" + - name: other-repo-docs + paths: + - docs/**/*.md + context: "Documentation for owner/other-repo" + checkout: + repository: owner/other-repo + ref: main + path: ./other-repo # optional; defaults to /tmp/gh-aw/qmd-checkout- +``` + +Each `checkout:` entry accepts the same options as the top-level [`checkout:`](/gh-aw/reference/frontmatter/#checkout) field: `repository`, `ref`, `path`, `token`, `fetch-depth`, `sparse-checkout`, `submodules`, and `lfs`. + +The optional `context:` field provides additional hints to the agent about the collection's content (e.g. product area, audience, or version). + +### Searches form + +Download files returned by GitHub code search and add them to the index: + +```yaml wrap +tools: + qmd: + searches: + - query: "repo:owner/repo language:Markdown path:docs/" + min: 1 # fail the activation job if fewer results (default: 0) + max: 30 # download at most this many files (default: 30) + github-token: ${{ secrets.GITHUB_TOKEN }} +``` + +Each search entry runs `gh search code ` in the activation job, downloads every matching file via the GitHub API, and registers the result as a separate qmd collection named `search-0`, `search-1`, etc. + +Use `github-app:` instead of `github-token:` for cross-organization access: + +```yaml wrap +tools: + qmd: + searches: + - query: "org:myorg language:Markdown path:docs/" + github-app: + app-id: ${{ vars.APP_ID }} + private-key: ${{ secrets.APP_PRIVATE_KEY }} +``` + +### Cache key + +Persist the index in GitHub Actions cache to speed up subsequent runs. On a cache hit all indexing steps are skipped automatically: + +```yaml wrap +tools: + qmd: + checkouts: + - name: docs + paths: [docs/**/*.md] + cache-key: "qmd-index-${{ hashFiles('docs/**') }}" +``` + +#### Read-only mode + +When `cache-key` is set without any indexing sources (`checkouts` or `searches`), the tool operates in **read-only mode**: the activation job restores the index from cache (failing silently if the cache does not exist yet) and skips all Node.js, npm, and qmd build steps entirely. This is useful for maintaining a shared, pre-built documentation database: + +```yaml wrap +tools: + qmd: + cache-key: "qmd-index-v1" +``` + +### Combined form + +All sources can be combined in a single configuration: + +```yaml wrap +tools: + qmd: + checkouts: + - name: local-docs + paths: [docs/**/*.md] + context: "Project documentation" + - name: sdk-docs + paths: [README.md, docs/**/*.md] + context: "SDK reference" + checkout: + repository: owner/sdk + path: ./sdk + searches: + - query: "org:myorg language:Markdown path:wiki/" + max: 50 + github-token: ${{ secrets.GITHUB_TOKEN }} + cache-key: "qmd-index-${{ hashFiles('docs/**') }}" +``` + +## Configuration reference + +### `qmd:` fields + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `checkouts` | `QmdDocCollection[]` | No | Named collections, each with optional per-collection checkout. | +| `searches` | `QmdSearchEntry[]` | No | GitHub code search queries whose results are downloaded and indexed. | +| `cache-key` | `string` | No | GitHub Actions cache key for persisting the index across runs. When set without sources, enables read-only mode. | + +### `QmdDocCollection` fields + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `name` | `string` | No | Collection identifier (defaults to `"docs-"`). | +| `paths` | `string[]` | No | Glob patterns for files to include (defaults to `**/*.md`). | +| `context` | `string` | No | Optional context hint for the agent about this collection's content (e.g. `"GitHub Actions documentation"`). | +| `checkout` | `CheckoutConfig` | No | Repository checkout options — same syntax as the top-level [`checkout:`](/gh-aw/reference/frontmatter/#checkout) field. Defaults to the current repository. | + +### `QmdSearchEntry` fields + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `query` | `string` | Yes | GitHub code search query string (e.g., `"repo:owner/repo language:Markdown"`). | +| `min` | `int` | No | Minimum number of results required; fails the activation job if not met (default: `0`). | +| `max` | `int` | No | Maximum number of files to download (default: `30`). | +| `github-token` | `string` | No | GitHub token for authenticated search (e.g., `${{ secrets.GITHUB_TOKEN }}`). | +| `github-app` | `GitHubAppConfig` | No | GitHub App credentials for cross-organization access. | + +## Permissions + +The `qmd` tool does **not** require `contents: read` in the agent job. All file access happens in the activation job, which already has that permission. + +```yaml wrap +# No extra permissions needed for the agent job +permissions: + contents: read # activation job only — already present by default +``` + +## Agent usage + +When qmd is active, the agent's system prompt instructs it to use the `search` tool before falling back to file listing or `bash`. Example queries: + +- `"how to configure MCP servers"` — finds docs about MCP setup +- `"safe-outputs create-pull-request options"` — finds safe-output option reference +- `"permissions frontmatter field"` — finds permission configuration docs + +The tool returns file paths ranked by relevance. Use standard file reading to fetch full content. + +## Related Documentation + +- [Tools](/gh-aw/reference/tools/) - Overview of all built-in tools +- [Frontmatter](/gh-aw/reference/frontmatter/#checkout) - Top-level checkout configuration +- [Permissions](/gh-aw/reference/permissions/) - GitHub Actions permission configuration +- [Dependabot](/gh-aw/reference/dependabot/) - Automatic dependency updates (tracks `@tobilu/qmd` version) diff --git a/docs/src/content/docs/reference/tools.md b/docs/src/content/docs/reference/tools.md index 70b1e3a188..810671e6b2 100644 --- a/docs/src/content/docs/reference/tools.md +++ b/docs/src/content/docs/reference/tools.md @@ -100,6 +100,20 @@ tools: See **[Repo Memory Reference](/gh-aw/reference/repo-memory/)** for complete configuration options and usage examples. +### QMD Documentation Search (`qmd:`) — Experimental + +Build a local vector search index over documentation files and expose it as an MCP search tool. The index is built in a dedicated indexing job (no `contents: read` needed in the agent job): + +```yaml wrap +tools: + qmd: + checkouts: + - paths: + - docs/**/*.md +``` + +See **[QMD Reference](/gh-aw/reference/qmd/)** for complete configuration options, checkout support, GitHub search integration, and cache key usage. + ### Introspection on Agentic Workflows (`agentic-workflows:`) Provides workflow introspection, log analysis, and debugging tools. Requires `actions: read` permission: @@ -147,6 +161,7 @@ mcp-servers: - [Playwright](/gh-aw/reference/playwright/) - Browser automation and testing configuration - [Cache Memory](/gh-aw/reference/cache-memory/) - Persistent memory across workflow runs - [Repo Memory](/gh-aw/reference/repo-memory/) - Repository-specific memory storage +- [QMD Documentation Search](/gh-aw/reference/qmd/) - Vector similarity search over documentation files - [MCP Scripts](/gh-aw/reference/mcp-scripts/) - Define custom inline tools with JavaScript or shell scripts - [Frontmatter](/gh-aw/reference/frontmatter/) - All frontmatter configuration options - [Network Permissions](/gh-aw/reference/network/) - Network access control for AI engines diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go index 77b2cb2907..bcde5cec66 100644 --- a/pkg/constants/constants.go +++ b/pkg/constants/constants.go @@ -236,6 +236,12 @@ const ( // DefaultMCPInspectorPort is the default port for the MCP inspector (safe-outputs server) DefaultMCPInspectorPort = 3001 + // DefaultQmdMCPPort is the TCP port for the qmd HTTP MCP server started in the agent job. + // qmd runs as a Docker container (node:24) with `qmd mcp --http --port PORT`; using HTTP + // transport avoids node-llama-cpp's direct process.stdout writes (dot-progress during model + // loading) from corrupting the stdio JSON-RPC stream. + DefaultQmdMCPPort = 8181 + // MinNetworkPort is the minimum valid network port number MinNetworkPort = 1 @@ -424,6 +430,13 @@ const DefaultAPMVersion Version = "v0.8.4" // DefaultPlaywrightMCPVersion is the default version of the @playwright/mcp package const DefaultPlaywrightMCPVersion Version = "0.0.68" +// DefaultQmdVersion is the default version of the @tobilu/qmd npm package +const DefaultQmdVersion Version = "2.0.1" + +// DefaultQmdIndexingRunnerImage is the default runner image for the qmd indexing job. +// Uses the GPU-enabled T4 runner image so node-llama-cpp can leverage the GPU during embedding. +const DefaultQmdIndexingRunnerImage = "aw-gpu-runner-T4" + // DefaultPlaywrightBrowserVersion is the default version of the Playwright browser Docker image const DefaultPlaywrightBrowserVersion Version = "v1.58.2" @@ -624,6 +637,7 @@ var DangerousPropertyNames = []string{ const AgentJobName JobName = "agent" const ActivationJobName JobName = "activation" +const IndexingJobName JobName = "indexing" const PreActivationJobName JobName = "pre_activation" const DetectionJobName JobName = "detection" const SafeOutputArtifactName = "safe-output" diff --git a/pkg/parser/schemas/main_workflow_schema.json b/pkg/parser/schemas/main_workflow_schema.json index fc1dff4be0..4ae390c7c1 100644 --- a/pkg/parser/schemas/main_workflow_schema.json +++ b/pkg/parser/schemas/main_workflow_schema.json @@ -322,7 +322,7 @@ { "type": "array", "minItems": 1, - "description": "Array of label names — any of these labels will trigger the workflow.", + "description": "Array of label names \u2014 any of these labels will trigger the workflow.", "items": { "type": "string", "minLength": 1, @@ -343,7 +343,7 @@ { "type": "array", "minItems": 1, - "description": "Array of label names — any of these labels will trigger the workflow.", + "description": "Array of label names \u2014 any of these labels will trigger the workflow.", "items": { "type": "string", "minLength": 1, @@ -1506,12 +1506,12 @@ "description": "Skip workflow execution for specific GitHub users. Useful for preventing workflows from running for specific accounts (e.g., bots, specific team members)." }, "roles": { - "description": "Repository access roles required to trigger agentic workflows. Defaults to ['admin', 'maintainer', 'write'] for security. Use 'all' to allow any authenticated user (⚠️ security consideration).", + "description": "Repository access roles required to trigger agentic workflows. Defaults to ['admin', 'maintainer', 'write'] for security. Use 'all' to allow any authenticated user (\u26a0\ufe0f security consideration).", "oneOf": [ { "type": "string", "enum": ["all"], - "description": "Allow any authenticated user to trigger the workflow (⚠️ disables permission checking entirely - use with caution)" + "description": "Allow any authenticated user to trigger the workflow (\u26a0\ufe0f disables permission checking entirely - use with caution)" }, { "type": "array", @@ -1864,7 +1864,7 @@ "vulnerability-alerts": { "type": "string", "enum": ["read", "write", "none"], - "description": "Permission level for Dependabot vulnerability alerts (read/write/none). GitHub App-only permission: required to access Dependabot alerts via the GitHub MCP server. The GITHUB_TOKEN does not have this permission — a GitHub App must be configured." + "description": "Permission level for Dependabot vulnerability alerts (read/write/none). GitHub App-only permission: required to access Dependabot alerts via the GitHub MCP server. The GITHUB_TOKEN does not have this permission \u2014 a GitHub App must be configured." }, "all": { "type": "string", @@ -2491,7 +2491,7 @@ }, "network": { "$comment": "Strict mode requirements: When strict=true, the 'network' field must be present (not null/undefined) and cannot contain standalone wildcard '*' in allowed domains (but patterns like '*.example.com' ARE allowed). This is validated in Go code (pkg/workflow/strict_mode_validation.go) via validateStrictNetwork().", - "description": "Network access control for AI engines using ecosystem identifiers and domain allowlists. Supports wildcard patterns like '*.example.com' to match any subdomain. Controls web fetch and search capabilities. IMPORTANT: For workflows that build/install/test code, always include the language ecosystem identifier alongside 'defaults' — 'defaults' alone only covers basic infrastructure, not package registries. Key ecosystem identifiers by runtime: 'dotnet' (.NET/NuGet), 'python' (pip/PyPI), 'node' (npm/yarn), 'go' (go modules), 'java' (Maven/Gradle), 'ruby' (Bundler), 'rust' (Cargo), 'swift' (Swift PM). Example: a .NET project needs network: { allowed: [defaults, dotnet] }.", + "description": "Network access control for AI engines using ecosystem identifiers and domain allowlists. Supports wildcard patterns like '*.example.com' to match any subdomain. Controls web fetch and search capabilities. IMPORTANT: For workflows that build/install/test code, always include the language ecosystem identifier alongside 'defaults' \u2014 'defaults' alone only covers basic infrastructure, not package registries. Key ecosystem identifiers by runtime: 'dotnet' (.NET/NuGet), 'python' (pip/PyPI), 'node' (npm/yarn), 'go' (go modules), 'java' (Maven/Gradle), 'ruby' (Bundler), 'rust' (Cargo), 'swift' (Swift PM). Example: a .NET project needs network: { allowed: [defaults, dotnet] }.", "examples": [ "defaults", { @@ -2967,7 +2967,7 @@ [ { "name": "Verify Post-Steps Execution", - "run": "echo \"✅ Post-steps are executing correctly\"\necho \"This step runs after the AI agent completes\"\n" + "run": "echo \"\u2705 Post-steps are executing correctly\"\necho \"This step runs after the AI agent completes\"\n" }, { "name": "Upload Test Results", @@ -3422,6 +3422,100 @@ ], "examples": [true, null] }, + "qmd": { + "description": "qmd documentation search tool (https://github.com/tobi/qmd). Builds a local vector search index in a dedicated indexing job and shares it with the agent job via GitHub Actions cache. The agent job mounts a search MCP server over the pre-built index and does not need contents:read permission.", + "type": "object", + "properties": { + "checkouts": { + "type": "array", + "description": "List of named documentation collections built from checked-out repositories. Each entry can optionally specify its own checkout configuration to target a different repository.", + "items": { + "$ref": "#/$defs/qmdCollection" + }, + "minItems": 1 + }, + "searches": { + "type": "array", + "description": "List of GitHub search queries whose results are downloaded and added to the qmd index.", + "items": { + "$ref": "#/$defs/qmdSearchEntry" + }, + "minItems": 1 + }, + "cache-key": { + "type": "string", + "description": "GitHub Actions cache key used to persist the qmd index across workflow runs. When set without any indexing sources (checkouts/searches), qmd operates in read-only mode: the index is restored from cache and all indexing steps are skipped.", + "examples": ["qmd-index-${{ hashFiles('docs/**') }}", "qmd-index-v1"] + }, + "gpu": { + "type": "boolean", + "description": "Enable GPU acceleration for the embedding model (node-llama-cpp). Defaults to false: NODE_LLAMA_CPP_GPU=false is injected into the indexing step so GPU probing is skipped on CPU-only runners. Set to true only when the indexing runner has a GPU.", + "default": false + }, + "runs-on": { + "type": "string", + "description": "Override the runner image for the qmd indexing job. Defaults to the same runner as the agent job. Use this when the indexing job requires a different runner (e.g. a GPU runner).", + "examples": ["ubuntu-latest", "ubuntu-latest-gpu", "self-hosted"] + } + }, + "additionalProperties": false, + "examples": [ + { + "checkouts": [ + { + "name": "current-docs", + "paths": ["docs/**/*.md"] + }, + { + "name": "other-docs", + "paths": ["docs/**/*.md"], + "context": "Documentation for owner/other-repo", + "checkout": { + "repository": "owner/other-repo", + "path": "./other-repo" + } + } + ] + }, + { + "searches": [ + { + "query": "repo:owner/repo language:Markdown path:docs/", + "min": 1, + "max": 30, + "github-token": "${{ secrets.GITHUB_TOKEN }}" + } + ] + }, + { + "checkouts": [ + { + "name": "local-docs", + "paths": ["docs/**/*.md"] + } + ], + "searches": [ + { + "query": "org:myorg language:Markdown", + "max": 50, + "github-token": "${{ secrets.GITHUB_TOKEN }}" + } + ] + }, + { + "cache-key": "qmd-index-${{ hashFiles('docs/**') }}" + }, + { + "checkouts": [ + { + "name": "docs", + "paths": ["docs/**/*.md"] + } + ], + "cache-key": "qmd-index-${{ hashFiles('docs/**') }}" + } + ] + }, "cache-memory": { "description": "Cache memory MCP configuration for persistent memory storage", "oneOf": [ @@ -5582,7 +5676,7 @@ "items": { "type": "string" }, - "description": "Exclusive allowlist of glob patterns. When set, every file in the patch must match at least one pattern — files outside the list are always refused, including normal source files. This is a restriction, not an exception: setting allowed-files: [\".github/workflows/*\"] blocks all other files. To allow multiple sets of files, list all patterns explicitly. Acts independently of the protected-files policy; both checks must pass. To modify a protected file, it must both match allowed-files and be permitted by protected-files (e.g. protected-files: allowed). Supports * (any characters except /) and ** (any characters including /)." + "description": "Exclusive allowlist of glob patterns. When set, every file in the patch must match at least one pattern \u2014 files outside the list are always refused, including normal source files. This is a restriction, not an exception: setting allowed-files: [\".github/workflows/*\"] blocks all other files. To allow multiple sets of files, list all patterns explicitly. Acts independently of the protected-files policy; both checks must pass. To modify a protected file, it must both match allowed-files and be permitted by protected-files (e.g. protected-files: allowed). Supports * (any characters except /) and ** (any characters including /)." }, "preserve-branch-name": { "type": "boolean", @@ -5817,7 +5911,7 @@ "oneOf": [ { "type": "object", - "description": "Configuration for resolving review threads on pull requests. Resolution is scoped to the triggering PR only — threads on other PRs cannot be resolved.", + "description": "Configuration for resolving review threads on pull requests. Resolution is scoped to the triggering PR only \u2014 threads on other PRs cannot be resolved.", "properties": { "max": { "description": "Maximum number of review threads to resolve (default: 10) Supports integer or GitHub Actions expression (e.g. '${{ inputs.max }}').", @@ -6731,7 +6825,7 @@ "items": { "type": "string" }, - "description": "Exclusive allowlist of glob patterns. When set, every file in the patch must match at least one pattern — files outside the list are always refused, including normal source files. This is a restriction, not an exception: setting allowed-files: [\".github/workflows/*\"] blocks all other files. To allow multiple sets of files, list all patterns explicitly. Acts independently of the protected-files policy; both checks must pass. To modify a protected file, it must both match allowed-files and be permitted by protected-files (e.g. protected-files: allowed). Supports * (any characters except /) and ** (any characters including /)." + "description": "Exclusive allowlist of glob patterns. When set, every file in the patch must match at least one pattern \u2014 files outside the list are always refused, including normal source files. This is a restriction, not an exception: setting allowed-files: [\".github/workflows/*\"] blocks all other files. To allow multiple sets of files, list all patterns explicitly. Acts independently of the protected-files policy; both checks must pass. To modify a protected file, it must both match allowed-files and be permitted by protected-files (e.g. protected-files: allowed). Supports * (any characters except /) and ** (any characters including /)." }, "excluded-files": { "type": "array", @@ -7597,7 +7691,7 @@ }, "scripts": { "type": "object", - "description": "Inline JavaScript script handlers that run inside the consolidated safe-outputs job handler loop. Unlike 'jobs' (which create separate GitHub Actions jobs), scripts execute in-process alongside the built-in handlers. Users write only the body of the main function — the compiler wraps it with 'async function main(config = {}) { ... }' and 'module.exports = { main };' automatically. Script names containing dashes will be automatically normalized to underscores (e.g., 'post-slack-message' becomes 'post_slack_message').", + "description": "Inline JavaScript script handlers that run inside the consolidated safe-outputs job handler loop. Unlike 'jobs' (which create separate GitHub Actions jobs), scripts execute in-process alongside the built-in handlers. Users write only the body of the main function \u2014 the compiler wraps it with 'async function main(config = {}) { ... }' and 'module.exports = { main };' automatically. Script names containing dashes will be automatically normalized to underscores (e.g., 'post-slack-message' becomes 'post_slack_message').", "patternProperties": { "^[a-zA-Z_][a-zA-Z0-9_-]*$": { "type": "object", @@ -7655,7 +7749,7 @@ }, "script": { "type": "string", - "description": "JavaScript handler body. Write only the code that runs inside the handler for each item — the compiler generates the full outer wrapper including config input destructuring (`const { channel, message } = config;`) and the handler function (`return async function handleX(item, resolvedTemporaryIds) { ... }`). The body has access to `item` (runtime message with input values), `resolvedTemporaryIds` (map of temporary IDs), and config-destructured local variables for each declared input." + "description": "JavaScript handler body. Write only the code that runs inside the handler for each item \u2014 the compiler generates the full outer wrapper including config input destructuring (`const { channel, message } = config;`) and the handler function (`return async function handleX(item, resolvedTemporaryIds) { ... }`). The body has access to `item` (runtime message with input values), `resolvedTemporaryIds` (map of temporary IDs), and config-destructured local variables for each declared input." } }, "required": ["script"], @@ -7690,8 +7784,8 @@ }, "staged-title": { "type": "string", - "description": "Custom title template for staged mode preview. Available placeholders: {operation}. Example: '🎭 Preview: {operation}'", - "examples": ["🎭 Preview: {operation}", "## Staged Mode: {operation}"] + "description": "Custom title template for staged mode preview. Available placeholders: {operation}. Example: '\ud83c\udfad Preview: {operation}'", + "examples": ["\ud83c\udfad Preview: {operation}", "## Staged Mode: {operation}"] }, "staged-description": { "type": "string", @@ -7705,18 +7799,18 @@ }, "run-success": { "type": "string", - "description": "Custom message template for successful workflow completion. Available placeholders: {workflow_name}, {run_url}. Default: '✅ Agentic [{workflow_name}]({run_url}) completed successfully.'", - "examples": ["✅ Agentic [{workflow_name}]({run_url}) completed successfully.", "✅ [{workflow_name}]({run_url}) finished."] + "description": "Custom message template for successful workflow completion. Available placeholders: {workflow_name}, {run_url}. Default: '\u2705 Agentic [{workflow_name}]({run_url}) completed successfully.'", + "examples": ["\u2705 Agentic [{workflow_name}]({run_url}) completed successfully.", "\u2705 [{workflow_name}]({run_url}) finished."] }, "run-failure": { "type": "string", - "description": "Custom message template for failed workflow. Available placeholders: {workflow_name}, {run_url}, {status}. Default: '❌ Agentic [{workflow_name}]({run_url}) {status} and wasn't able to produce a result.'", - "examples": ["❌ Agentic [{workflow_name}]({run_url}) {status} and wasn't able to produce a result.", "❌ [{workflow_name}]({run_url}) {status}."] + "description": "Custom message template for failed workflow. Available placeholders: {workflow_name}, {run_url}, {status}. Default: '\u274c Agentic [{workflow_name}]({run_url}) {status} and wasn't able to produce a result.'", + "examples": ["\u274c Agentic [{workflow_name}]({run_url}) {status} and wasn't able to produce a result.", "\u274c [{workflow_name}]({run_url}) {status}."] }, "detection-failure": { "type": "string", - "description": "Custom message template for detection job failure. Available placeholders: {workflow_name}, {run_url}. Default: '⚠️ Security scanning failed for [{workflow_name}]({run_url}). Review the logs for details.'", - "examples": ["⚠️ Security scanning failed for [{workflow_name}]({run_url}). Review the logs for details.", "⚠️ Detection job failed in [{workflow_name}]({run_url})."] + "description": "Custom message template for detection job failure. Available placeholders: {workflow_name}, {run_url}. Default: '\u26a0\ufe0f Security scanning failed for [{workflow_name}]({run_url}). Review the logs for details.'", + "examples": ["\u26a0\ufe0f Security scanning failed for [{workflow_name}]({run_url}). Review the logs for details.", "\u26a0\ufe0f Detection job failed in [{workflow_name}]({run_url})."] }, "agent-failure-issue": { "type": "string", @@ -8362,7 +8456,7 @@ }, "github-token": { "type": "string", - "description": "GitHub token expression to authenticate APM with private package repositories. Uses cascading fallback (GH_AW_PLUGINS_TOKEN → GH_AW_GITHUB_TOKEN → GITHUB_TOKEN) when not specified. Takes effect unless github-app is also configured (which takes precedence).", + "description": "GitHub token expression to authenticate APM with private package repositories. Uses cascading fallback (GH_AW_PLUGINS_TOKEN \u2192 GH_AW_GITHUB_TOKEN \u2192 GITHUB_TOKEN) when not specified. Takes effect unless github-app is also configured (which takes precedence).", "examples": ["${{ secrets.MY_TOKEN }}", "${{ secrets.GH_AW_GITHUB_TOKEN }}"] } }, @@ -8938,7 +9032,7 @@ }, "auth": { "type": "array", - "description": "Authentication bindings — maps logical roles (e.g. 'api-key') to GitHub Actions secret names", + "description": "Authentication bindings \u2014 maps logical roles (e.g. 'api-key') to GitHub Actions secret names", "items": { "type": "object", "properties": { @@ -9348,6 +9442,76 @@ "examples": [["*"], ["refs/pulls/open/*"], ["main", "feature/my-branch"], ["feature/*"]] } } + }, + "qmdCollection": { + "type": "object", + "description": "A named documentation collection for the qmd tool, built from a checked-out repository. Each collection can optionally target a different repository via its own checkout configuration.", + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Collection identifier used in the qmd index. Defaults to 'docs-' for multiple collections." + }, + "paths": { + "type": "array", + "description": "List of glob patterns for documentation files to include in this collection.", + "items": { + "type": "string" + }, + "minItems": 1, + "examples": [["docs/**/*.md", ".github/**/*.md"]] + }, + "context": { + "type": "string", + "description": "Optional context injected into the qmd collection, providing the agent with additional hints about the content (e.g. 'GitHub Actions documentation')." + }, + "checkout": { + "$ref": "#/$defs/checkoutConfig", + "description": "Optional checkout configuration for this collection. When set, the specified repository is checked out and its files are indexed. Defaults to the current repository if not set." + } + } + }, + "qmdSearchEntry": { + "type": "object", + "description": "A GitHub search entry for the qmd tool. Supports code search (type: code, default) and GitHub issue list (type: issues). Results are downloaded and indexed as qmd collections.", + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Optional name for the qmd collection. Defaults to \"search-{index}\" when not set." + }, + "type": { + "type": "string", + "enum": ["code", "issues"], + "default": "code", + "description": "Search backend type. \"code\" (default) uses gh search code to find repository files. \"issues\" uses gh issue list to fetch open GitHub issues and index them as markdown files." + }, + "query": { + "type": "string", + "description": "For type \"code\": GitHub code search query string. For type \"issues\": repository slug (\"owner/repo\"); defaults to the current repository when empty.", + "examples": ["repo:owner/repo language:Markdown path:docs/", "org:myorg language:Markdown", "owner/repo"] + }, + "min": { + "type": "integer", + "minimum": 0, + "description": "Minimum number of results required. If fewer results are returned the activation job fails." + }, + "max": { + "type": "integer", + "minimum": 1, + "default": 30, + "description": "Maximum number of search results to download. Defaults to 30." + }, + "github-token": { + "type": "string", + "description": "GitHub token used to authenticate the search API request. Mutually exclusive with github-app.", + "examples": ["${{ secrets.GITHUB_TOKEN }}", "${{ secrets.MY_PAT }}"] + }, + "github-app": { + "$ref": "#/$defs/github_app", + "description": "GitHub App configuration used to mint a token for the search API request. Mutually exclusive with github-token." + } + } } } } diff --git a/pkg/workflow/claude_tools.go b/pkg/workflow/claude_tools.go index 176d2670eb..10fb611071 100644 --- a/pkg/workflow/claude_tools.go +++ b/pkg/workflow/claude_tools.go @@ -340,11 +340,10 @@ func (e *ClaudeEngine) computeAllowedClaudeToolsString(tools map[string]any, saf allowedTools = append(allowedTools, "mcp__github__"+defaultTool) } } - } else if toolName == "serena" { - // Serena uses a language-based config (not standard MCP type/url/command fields), - // so hasMCPConfig returns false. Add the server wildcard so Claude can use all - // Serena tools (find_symbol, activate_project, etc.). - allowedTools = append(allowedTools, "mcp__serena") + } else if toolName == "serena" || toolName == "qmd" { + // Serena and qmd use non-standard config shapes (not standard MCP type/url/command fields), + // so hasMCPConfig returns false. Add the server wildcard so Claude can use all tools. + allowedTools = append(allowedTools, "mcp__"+toolName) } else if toolName == "playwright" || isCustomMCP { // Handle playwright and custom MCP tools with generic parsing if allowed, hasAllowed := mcpConfig["allowed"]; hasAllowed { diff --git a/pkg/workflow/codex_mcp.go b/pkg/workflow/codex_mcp.go index 0ca76a52ea..950d5fcebd 100644 --- a/pkg/workflow/codex_mcp.go +++ b/pkg/workflow/codex_mcp.go @@ -52,6 +52,9 @@ func (e *CodexEngine) RenderMCPConfig(yaml *strings.Builder, tools map[string]an case "playwright": playwrightTool := expandedTools["playwright"] renderer.RenderPlaywrightMCP(yaml, playwrightTool) + case "qmd": + qmdTool := expandedTools["qmd"] + renderer.RenderQmdMCP(yaml, qmdTool, workflowData) case "serena": serenaTool := expandedTools["serena"] renderer.RenderSerenaMCP(yaml, serenaTool) diff --git a/pkg/workflow/compiler.go b/pkg/workflow/compiler.go index b941355f43..6ccde4b7f0 100644 --- a/pkg/workflow/compiler.go +++ b/pkg/workflow/compiler.go @@ -250,6 +250,12 @@ func (c *Compiler) validateWorkflowData(workflowData *WorkflowData, markdownPath c.IncrementWarningCount() } + // Emit experimental warning for qmd documentation search feature + if workflowData.QmdConfig != nil { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage("Using experimental feature: qmd")) + c.IncrementWarningCount() + } + // Emit experimental warning for dependencies (APM) feature if workflowData.APMDependencies != nil && len(workflowData.APMDependencies.Packages) > 0 { fmt.Fprintln(os.Stderr, console.FormatWarningMessage("Using experimental feature: dependencies (APM)")) diff --git a/pkg/workflow/compiler_activation_job.go b/pkg/workflow/compiler_activation_job.go index 6f192029c3..d78a9aa7ad 100644 --- a/pkg/workflow/compiler_activation_job.go +++ b/pkg/workflow/compiler_activation_job.go @@ -485,6 +485,10 @@ func (c *Compiler) buildActivationJob(data *WorkflowData, preActivationJobCreate } } + // qmd indexing is handled by the separate "indexing" job that depends on activation. + // That job builds the index and saves/restores it via the GitHub Actions cache, and the agent job + // restores the index using actions/cache/restore. + // Upload aw_info.json and prompt.txt as the activation artifact for the agent job to download. // In workflow_call context the artifact is prefixed to avoid name clashes when multiple callers // invoke the same reusable workflow within the same parent workflow run. diff --git a/pkg/workflow/compiler_jobs.go b/pkg/workflow/compiler_jobs.go index 160d0285b2..56e154c8a7 100644 --- a/pkg/workflow/compiler_jobs.go +++ b/pkg/workflow/compiler_jobs.go @@ -212,6 +212,15 @@ func (c *Compiler) buildJobs(data *WorkflowData, markdownPath string) error { return err } + // Build qmd indexing job if the qmd tool is configured. + // This separate job depends on activation and builds the documentation search index. + // The agent job then depends on this indexing job to download the pre-built index. + if data.QmdConfig != nil { + if err := c.buildQmdIndexingJobWrapper(data); err != nil { + return err + } + } + // Build main workflow job if err := c.buildMainJobWrapper(data, activationJobCreated); err != nil { return err @@ -306,6 +315,20 @@ func (c *Compiler) buildMainJobWrapper(data *WorkflowData, activationJobCreated return nil } +// buildQmdIndexingJobWrapper builds the qmd indexing job and adds it to the job manager. +func (c *Compiler) buildQmdIndexingJobWrapper(data *WorkflowData) error { + compilerJobsLog.Print("Building qmd indexing job") + indexingJob, err := c.buildQmdIndexingJob(data) + if err != nil { + return fmt.Errorf("failed to build indexing job: %w", err) + } + if err := c.jobManager.AddJob(indexingJob); err != nil { + return fmt.Errorf("failed to add indexing job: %w", err) + } + compilerJobsLog.Printf("Successfully added indexing job: %s", string(constants.IndexingJobName)) + return nil +} + // buildMemoryManagementJobs builds memory management jobs (push_repo_memory and update_cache_memory). // These jobs handle artifact-based memory persistence to git branches and GitHub Actions cache. func (c *Compiler) buildMemoryManagementJobs(data *WorkflowData) error { diff --git a/pkg/workflow/compiler_main_job.go b/pkg/workflow/compiler_main_job.go index 68760e3a71..b2189c04f6 100644 --- a/pkg/workflow/compiler_main_job.go +++ b/pkg/workflow/compiler_main_job.go @@ -78,6 +78,15 @@ func (c *Compiler) buildMainJob(data *WorkflowData, activationJobCreated bool) ( depends = []string{string(constants.ActivationJobName)} // Depend on the activation job only if it exists } + // When the qmd tool is configured, the agent also depends on the indexing job (which builds + // the qmd search index). The indexing job depends on activation, but GitHub Actions only + // exposes outputs from DIRECT dependencies, so we must keep activation in needs too so that + // needs.activation.outputs.* expressions resolve correctly. + if data.QmdConfig != nil { + depends = append(depends, string(constants.IndexingJobName)) + compilerMainJobLog.Print("Agent job depends on indexing job (qmd tool configured)") + } + // Add custom jobs as dependencies only if they don't depend on pre_activation or agent // Custom jobs that depend on pre_activation are now dependencies of activation, // so the agent job gets them transitively through activation diff --git a/pkg/workflow/compiler_orchestrator_workflow.go b/pkg/workflow/compiler_orchestrator_workflow.go index 58cec6aab9..8f4ee08638 100644 --- a/pkg/workflow/compiler_orchestrator_workflow.go +++ b/pkg/workflow/compiler_orchestrator_workflow.go @@ -721,6 +721,11 @@ func (c *Compiler) extractAdditionalConfigurations( } workflowData.RepoMemoryConfig = repoMemoryConfig + // Extract qmd config from parsed tools + if toolsConfig.Qmd != nil { + workflowData.QmdConfig = toolsConfig.Qmd + } + // Extract and process mcp-scripts and safe-outputs workflowData.Command, workflowData.CommandEvents = c.extractCommandConfig(frontmatter) workflowData.LabelCommand, workflowData.LabelCommandEvents, workflowData.LabelCommandRemoveLabel = c.extractLabelCommandConfig(frontmatter) diff --git a/pkg/workflow/compiler_types.go b/pkg/workflow/compiler_types.go index 443545570a..0abaee08bf 100644 --- a/pkg/workflow/compiler_types.go +++ b/pkg/workflow/compiler_types.go @@ -415,6 +415,7 @@ type WorkflowData struct { RateLimit *RateLimitConfig // rate limiting configuration for workflow triggers CacheMemoryConfig *CacheMemoryConfig // parsed cache-memory configuration RepoMemoryConfig *RepoMemoryConfig // parsed repo-memory configuration + QmdConfig *QmdToolConfig // parsed qmd tool configuration (docs globs) Runtimes map[string]any // runtime version overrides from frontmatter APMDependencies *APMDependenciesInfo // APM (Agent Package Manager) dependency packages to install ToolsTimeout int // timeout in seconds for tool/MCP operations (0 = use engine default) diff --git a/pkg/workflow/compiler_yaml_main_job.go b/pkg/workflow/compiler_yaml_main_job.go index 581d1ab9bc..ad2730460a 100644 --- a/pkg/workflow/compiler_yaml_main_job.go +++ b/pkg/workflow/compiler_yaml_main_job.go @@ -279,6 +279,20 @@ func (c *Compiler) generateMainJobSteps(yaml *strings.Builder, data *WorkflowDat } } + // Restore qmd index and models cache if qmd tool is configured. + // The index was built and cached in the indexing job; we restore it using the precise + // cache key so we always get the index from the current workflow run. + // The models cache restores the embedding model weights (cross-platform GGUF files) that + // the gateway-managed qmd container mounts from ${HOME}/.cache/qmd/. + // Note: the node-llama-cpp binary cache is NOT restored here; the container downloads + // the appropriate prebuilt binary for its own OS on first use. + if data.QmdConfig != nil { + compilerYamlLog.Print("Adding qmd index exact-key cache restore step") + yaml.WriteString(generateQmdIndexCacheRestoreExactStep(data.QmdConfig)) + compilerYamlLog.Print("Adding qmd models cache restore step (read-only)") + yaml.WriteString(generateQmdModelsCacheRestoreStep()) + } + // GH_AW_SAFE_OUTPUTS is now set at job level, no setup step needed // Add GitHub MCP lockdown detection step if needed diff --git a/pkg/workflow/dependabot.go b/pkg/workflow/dependabot.go index cdf29c95ef..b257b873eb 100644 --- a/pkg/workflow/dependabot.go +++ b/pkg/workflow/dependabot.go @@ -13,6 +13,7 @@ import ( "strings" "github.com/github/gh-aw/pkg/console" + "github.com/github/gh-aw/pkg/constants" "github.com/github/gh-aw/pkg/logger" "github.com/goccy/go-yaml" ) @@ -175,6 +176,12 @@ func (c *Compiler) collectNpmDependencies(workflowDataList []*WorkflowData) []Np dep := parseNpmPackage(pkg) depMap[dep.Name] = dep.Version } + + // Track qmd builtin package version when qmd tool is configured + if workflowData.QmdConfig != nil { + depMap["@tobilu/qmd"] = string(constants.DefaultQmdVersion) + dependabotLog.Print("Added @tobilu/qmd builtin package to npm dependencies") + } } // Convert map to sorted slice diff --git a/pkg/workflow/mcp_config_validation.go b/pkg/workflow/mcp_config_validation.go index d8e03e69c5..fab2aebacb 100644 --- a/pkg/workflow/mcp_config_validation.go +++ b/pkg/workflow/mcp_config_validation.go @@ -64,6 +64,7 @@ func ValidateMCPConfigs(tools map[string]any) error { builtInTools := map[string]bool{ "github": true, "playwright": true, + "qmd": true, "serena": true, "agentic-workflows": true, "cache-memory": true, diff --git a/pkg/workflow/mcp_environment.go b/pkg/workflow/mcp_environment.go index 41190f3782..fd4039f644 100644 --- a/pkg/workflow/mcp_environment.go +++ b/pkg/workflow/mcp_environment.go @@ -20,6 +20,7 @@ // - Safe Outputs: GH_AW_SAFE_OUTPUTS_*, GH_AW_ASSETS_* // - MCP Scripts: GH_AW_MCP_SCRIPTS_PORT, GH_AW_MCP_SCRIPTS_API_KEY // - Serena: GH_AW_SERENA_PORT (local mode only) +// - qmd: env vars are set directly in the "Start QMD MCP Server" Docker step (not via gateway) // - Playwright: Secrets from custom args expressions // - HTTP MCP: Custom secrets from headers and env sections // @@ -124,6 +125,11 @@ func collectMCPEnvironmentVariables(tools map[string]any, mcpTools []string, wor envVars["GH_AW_SAFE_OUTPUTS_API_KEY"] = "${{ steps.safe-outputs-start.outputs.api_key }}" } + // qmd env vars (INDEX_PATH, NODE_LLAMA_CPP_GPU) are no longer added to the gateway + // environment. qmd now runs as a separate Docker container started by the + // "Start QMD MCP Server" step (see qmd.go:generateQmdStartStep), and the gateway + // connects to it via HTTP. The env vars are set directly in that Docker start step. + // Check for agentic-workflows GITHUB_TOKEN if hasAgenticWorkflows { envVars["GITHUB_TOKEN"] = "${{ secrets.GITHUB_TOKEN }}" diff --git a/pkg/workflow/mcp_renderer.go b/pkg/workflow/mcp_renderer.go index 2fc03bdec2..c28065597e 100644 --- a/pkg/workflow/mcp_renderer.go +++ b/pkg/workflow/mcp_renderer.go @@ -144,6 +144,11 @@ func RenderJSONMCPConfig( case "playwright": playwrightTool := tools["playwright"] options.Renderers.RenderPlaywright(&configBuilder, playwrightTool, isLast) + case "qmd": + qmdTool := tools["qmd"] + if options.Renderers.RenderQmd != nil { + options.Renderers.RenderQmd(&configBuilder, qmdTool, isLast, workflowData) + } case "serena": serenaTool := tools["serena"] options.Renderers.RenderSerena(&configBuilder, serenaTool, isLast) diff --git a/pkg/workflow/mcp_renderer_builtin.go b/pkg/workflow/mcp_renderer_builtin.go index ee0777fadd..c126bbddf9 100644 --- a/pkg/workflow/mcp_renderer_builtin.go +++ b/pkg/workflow/mcp_renderer_builtin.go @@ -73,6 +73,84 @@ func (r *MCPConfigRendererUnified) renderPlaywrightTOML(yaml *strings.Builder, p yaml.WriteString(" mounts = [\"/tmp/gh-aw/mcp-logs:/tmp/gh-aw/mcp-logs:rw\"]\n") } +// RenderQmdMCP generates the qmd documentation search MCP server configuration. +// qmd runs natively on the host VM with HTTP transport, started by the +// "Start QMD MCP Server" step before the gateway, so the gateway connects via HTTP. +// Using HTTP transport avoids node-llama-cpp's direct process.stdout writes (dot-progress +// during model loading) from corrupting the stdio JSON-RPC stream. +func (r *MCPConfigRendererUnified) RenderQmdMCP(yaml *strings.Builder, qmdTool any, workflowData *WorkflowData) { + mcpRendererLog.Printf("Rendering qmd MCP: format=%s, inline_args=%t", r.options.Format, r.options.InlineArgs) + + if r.options.Format == "toml" { + r.renderQmdTOML(yaml, workflowData) + // Add guard policies for TOML format as a separate section + if len(r.options.WriteSinkGuardPolicies) > 0 { + mcpRendererLog.Print("Adding guard-policies to qmd TOML (derived from GitHub guard-policy)") + renderGuardPoliciesToml(yaml, r.options.WriteSinkGuardPolicies, "qmd") + } + return + } + + // JSON format + renderQmdMCPConfigWithOptions(yaml, r.options.IsLast, r.options.IncludeCopilotFields, r.options.WriteSinkGuardPolicies, workflowData) +} + +// resolveQmdHost returns the hostname the gateway should use to reach the qmd HTTP server. +// qmd runs natively on the host VM, so port DefaultQmdMCPPort is bound on the host network. +// When the agent sandbox is enabled (default), the gateway runs inside a Docker container +// with its own network namespace and must reach the host via host.docker.internal. +// When the agent sandbox is disabled (agent.disabled: true), the gateway also runs on +// the host, so localhost is sufficient. +func resolveQmdHost(workflowData *WorkflowData) string { + if workflowData != nil && workflowData.SandboxConfig != nil && + workflowData.SandboxConfig.Agent != nil && workflowData.SandboxConfig.Agent.Disabled { + return "localhost" + } + return "host.docker.internal" +} + +// qmdMCPURL returns the full HTTP MCP URL for the qmd server. +func qmdMCPURL(workflowData *WorkflowData) string { + host := resolveQmdHost(workflowData) + return "http://" + host + ":" + strconv.Itoa(constants.DefaultQmdMCPPort) + "/mcp" +} + +// renderQmdTOML generates qmd MCP configuration in TOML format using HTTP transport. +// qmd is started natively before the gateway (see generateQmdStartStep), +// and the gateway connects to the qmd HTTP MCP server at DefaultQmdMCPPort/mcp. +func (r *MCPConfigRendererUnified) renderQmdTOML(yaml *strings.Builder, workflowData *WorkflowData) { + mcpRendererBuiltinLog.Print("Rendering qmd MCP in TOML format (HTTP transport)") + + url := qmdMCPURL(workflowData) + + yaml.WriteString(" \n") + yaml.WriteString(" [mcp_servers.qmd]\n") + yaml.WriteString(" type = \"http\"\n") + yaml.WriteString(" url = \"" + url + "\"\n") +} + +// renderQmdMCPConfigWithOptions generates the qmd MCP server configuration in JSON format. +// qmd uses HTTP transport (server started before the gateway), so only the URL is needed. +func renderQmdMCPConfigWithOptions(yaml *strings.Builder, isLast bool, includeCopilotFields bool, guardPolicies map[string]any, workflowData *WorkflowData) { + url := qmdMCPURL(workflowData) + + yaml.WriteString(" \"qmd\": {\n") + yaml.WriteString(" \"type\": \"http\",\n") + + if len(guardPolicies) > 0 { + yaml.WriteString(" \"url\": \"" + url + "\",\n") + renderGuardPoliciesJSON(yaml, guardPolicies, " ") + } else { + yaml.WriteString(" \"url\": \"" + url + "\"\n") + } + + if isLast { + yaml.WriteString(" }\n") + } else { + yaml.WriteString(" },\n") + } +} + // RenderSerenaMCP generates Serena MCP server configuration func (r *MCPConfigRendererUnified) RenderSerenaMCP(yaml *strings.Builder, serenaTool any) { mcpRendererLog.Printf("Rendering Serena MCP: format=%s, inline_args=%t", r.options.Format, r.options.InlineArgs) diff --git a/pkg/workflow/mcp_renderer_helpers.go b/pkg/workflow/mcp_renderer_helpers.go index d2419183c2..f7c313d8a4 100644 --- a/pkg/workflow/mcp_renderer_helpers.go +++ b/pkg/workflow/mcp_renderer_helpers.go @@ -78,6 +78,9 @@ func buildStandardJSONMCPRenderers( RenderPlaywright: func(yaml *strings.Builder, playwrightTool any, isLast bool) { createRenderer(isLast).RenderPlaywrightMCP(yaml, playwrightTool) }, + RenderQmd: func(yaml *strings.Builder, qmdTool any, isLast bool, workflowData *WorkflowData) { + createRenderer(isLast).RenderQmdMCP(yaml, qmdTool, workflowData) + }, RenderSerena: func(yaml *strings.Builder, serenaTool any, isLast bool) { createRenderer(isLast).RenderSerenaMCP(yaml, serenaTool) }, diff --git a/pkg/workflow/mcp_renderer_types.go b/pkg/workflow/mcp_renderer_types.go index 1833b2f675..ddd7ecbf63 100644 --- a/pkg/workflow/mcp_renderer_types.go +++ b/pkg/workflow/mcp_renderer_types.go @@ -34,6 +34,7 @@ type RenderCustomMCPToolConfigHandler func(yaml *strings.Builder, toolName strin type MCPToolRenderers struct { RenderGitHub func(yaml *strings.Builder, githubTool any, isLast bool, workflowData *WorkflowData) RenderPlaywright func(yaml *strings.Builder, playwrightTool any, isLast bool) + RenderQmd func(yaml *strings.Builder, qmdTool any, isLast bool, workflowData *WorkflowData) RenderSerena func(yaml *strings.Builder, serenaTool any, isLast bool) RenderCacheMemory func(yaml *strings.Builder, isLast bool, workflowData *WorkflowData) RenderAgenticWorkflows func(yaml *strings.Builder, isLast bool) diff --git a/pkg/workflow/mcp_setup_generator.go b/pkg/workflow/mcp_setup_generator.go index f9cc9af6ee..aeed760aa5 100644 --- a/pkg/workflow/mcp_setup_generator.go +++ b/pkg/workflow/mcp_setup_generator.go @@ -95,7 +95,7 @@ func (c *Compiler) generateMCPSetup(yaml *strings.Builder, tools map[string]any, continue } // Standard MCP tools - if toolName == "github" || toolName == "playwright" || toolName == "serena" || toolName == "cache-memory" || toolName == "agentic-workflows" { + if toolName == "github" || toolName == "playwright" || toolName == "qmd" || toolName == "serena" || toolName == "cache-memory" || toolName == "agentic-workflows" { mcpTools = append(mcpTools, toolName) } else if mcpConfig, ok := toolValue.(map[string]any); ok { // Check if it's explicitly marked as MCP type in the new format @@ -467,6 +467,13 @@ func (c *Compiler) generateMCPSetup(yaml *strings.Builder, tools map[string]any, yaml.WriteString(" \n") } + // Start the qmd MCP HTTP server natively if qmd is configured. + // qmd must run on the host VM (not in Docker) because node-llama-cpp compiles + // platform-native binaries. HTTP mode keeps MCP traffic on TCP, separate from stdout. + if workflowData != nil && workflowData.QmdConfig != nil { + yaml.WriteString(generateQmdStartStep(workflowData.QmdConfig)) + } + // The MCP gateway is always enabled, even when agent sandbox is disabled // Use the engine's RenderMCPConfig method yaml.WriteString(" - name: Start MCP Gateway\n") diff --git a/pkg/workflow/prompt_constants.go b/pkg/workflow/prompt_constants.go index 642955b592..a29a759da8 100644 --- a/pkg/workflow/prompt_constants.go +++ b/pkg/workflow/prompt_constants.go @@ -12,6 +12,7 @@ const ( prContextPromptFile = "pr_context_prompt.md" tempFolderPromptFile = "temp_folder_prompt.md" playwrightPromptFile = "playwright_prompt.md" + qmdPromptFile = "qmd_prompt.md" markdownPromptFile = "markdown.md" xpiaPromptFile = "xpia.md" cacheMemoryPromptFile = "cache_memory_prompt.md" diff --git a/pkg/workflow/qmd.go b/pkg/workflow/qmd.go new file mode 100644 index 0000000000..6c2df28013 --- /dev/null +++ b/pkg/workflow/qmd.go @@ -0,0 +1,617 @@ +// Package workflow provides qmd documentation search tool integration. +// +// # QMD Tool +// +// This file handles the qmd (https://github.com/tobi/qmd) builtin tool integration. +// qmd provides local vector search over documentation files using the @tobilu/qmd npm package. +// +// The integration has three phases: +// +// 1. Activation job: runs the normal activation steps (timestamp check, prompt, reactions, etc.). +// Does NOT build the qmd index. +// +// 2. Indexing job (new): runs after activation, builds the search index from configured +// checkouts and/or GitHub searches, and saves it to GitHub Actions cache. +// This job has contents:read permission so the agent job does NOT need it. +// The index is built by a single actions/github-script step that runs qmd_index.cjs, +// which uses the @tobilu/qmd JavaScript SDK to build the collections. +// +// 3. Agent job: depends on BOTH the activation job (for its outputs) and the indexing job +// (for the qmd index cache). Restores the pre-built index from cache using the precise +// cache key and mounts the qmd MCP server pointing at it. +// +// # Configuration +// +// Two sources can populate the index: +// +// - checkouts: glob-based collections from checked-out repositories (each optionally with +// its own checkout config to target a different repo) +// - searches: GitHub search queries whose results are downloaded and added to the index +// +// Optionally, a cache-key can be set to persist the index in GitHub Actions cache: +// +// - cache-key only (read-only mode): the index is restored from cache; no indexing steps run +// - cache-key + sources: index is built if cache miss, then saved to cache for future runs +// +// Example frontmatter: +// +// tools: +// qmd: +// checkouts: +// - name: docs +// paths: +// - docs/**/*.md +// searches: +// - query: "repo:owner/repo language:Markdown path:docs/" +// min: 1 +// max: 30 +// github-token: ${{ secrets.GITHUB_TOKEN }} +// cache-key: "qmd-index-${{ hashFiles('docs/**') }}" +// +// # Cache lifecycle +// +// The index is always stored in GitHub Actions cache. The default cache key is +// gh-aw-qmd-${{ github.run_id }} (ephemeral per run). The agent job restores from +// the exact same key that the indexing job saved, so no artifact upload/download is needed. +// +// Related files: +// - tools_types.go: QmdToolConfig, QmdDocCollection, QmdSearchEntry types +// - tools_parser.go: parseQmdTool / parseQmdDocCollection / parseQmdSearchEntry +// - mcp_renderer_builtin.go: RenderQmdMCP method +// - mcp_setup_generator.go: generateQmdStartStep (agent job HTTP server startup) +// - compiler_jobs.go: buildQmdIndexingJobWrapper +// - compiler_yaml_main_job.go: agent job qmd cache restore +// - actions/setup/js/qmd_index.cjs: JavaScript SDK implementation + +package workflow + +import ( + "encoding/json" + "fmt" + "strconv" + "strings" + + "github.com/github/gh-aw/pkg/constants" + "github.com/github/gh-aw/pkg/logger" +) + +var qmdLog = logger.New("workflow:qmd") + +// hasQmdTool checks if the qmd tool is enabled in the tools configuration. +func hasQmdTool(parsedTools *Tools) bool { + if parsedTools == nil { + return false + } + return parsedTools.Qmd != nil +} + +// qmdHasSources reports whether the qmd config has any indexing sources +// (checkouts or searches). When false and a cache-key is set, +// qmd operates in read-only mode: the index is restored from cache only. +func qmdHasSources(qmdConfig *QmdToolConfig) bool { + return len(qmdConfig.Checkouts) > 0 || len(qmdConfig.Searches) > 0 +} + +// generateQmdStartStep generates two GitHub Actions steps that set up and start the qmd MCP +// server in HTTP mode natively on the runner VM, before the MCP gateway. +// +// qmd must run natively (not in Docker) because node-llama-cpp compiles platform-specific +// binaries that must match the runner's CPU/OS and cannot run inside a generic Docker image. +// +// Using HTTP transport (qmd mcp --http) avoids node-llama-cpp's direct process.stdout writes +// (e.g. dot-progress characters during model loading) from being mixed into the stdio +// JSON-RPC stream and causing "invalid character '\x1b' looking for beginning of value" +// parse errors in the gateway. With HTTP transport the MCP protocol travels over TCP, so +// qmd's stdout/stderr are completely independent of the protocol channel. +// +// The two steps are: +// 1. Setup Node.js – ensures node v24 is available before running npx. +// 2. Start QMD MCP Server – installs @tobilu/qmd via npx, starts the HTTP server as a +// background process, and polls /health (up to 120 s) before continuing. +// +// The gateway then connects to http://localhost:{port}/mcp. +func generateQmdStartStep(qmdConfig *QmdToolConfig) string { + version := string(constants.DefaultQmdVersion) + port := constants.DefaultQmdMCPPort + portStr := strconv.Itoa(port) + + var sb strings.Builder + + // Step 1: Setup Node.js (node:24 required by @tobilu/qmd) + sb.WriteString(" - name: Setup Node.js for qmd MCP server\n") + fmt.Fprintf(&sb, " uses: %s\n", GetActionPin("actions/setup-node")) + sb.WriteString(" with:\n") + fmt.Fprintf(&sb, " node-version: \"%s\"\n", string(constants.DefaultNodeVersion)) + + // Step 2: Start qmd natively + sb.WriteString(" - name: Start QMD MCP Server\n") + sb.WriteString(" id: qmd-mcp-start\n") + sb.WriteString(" env:\n") + sb.WriteString(" INDEX_PATH: /tmp/gh-aw/qmd-index/index.sqlite\n") + sb.WriteString(" NO_COLOR: '1'\n") + if !qmdConfig.GPU { + sb.WriteString(" NODE_LLAMA_CPP_GPU: 'false'\n") + } + sb.WriteString(" run: |\n") + sb.WriteString(" # Start qmd MCP server natively in HTTP mode.\n") + sb.WriteString(" # qmd must run on the host VM (not in Docker) because node-llama-cpp\n") + sb.WriteString(" # requires platform-native binaries that cannot run in a generic container.\n") + sb.WriteString(" # HTTP transport keeps MCP traffic on TCP, fully separate from stdout.\n") + sb.WriteString(" npx --yes --package @tobilu/qmd@" + version + " qmd mcp --http --port " + portStr + " \\\n") + sb.WriteString(" >> /tmp/qmd-mcp.log 2>&1 &\n") + sb.WriteString(" # Save PID for logs; the GitHub Actions runner terminates all processes at job end.\n") + sb.WriteString(" echo $! > /tmp/qmd-mcp.pid\n") + sb.WriteString(" \n") + sb.WriteString(" # Wait up to 120 s for the server to accept requests\n") + sb.WriteString(" echo 'Waiting for QMD MCP server on port " + portStr + "...'\n") + sb.WriteString(" for i in $(seq 1 60); do\n") + sb.WriteString(" if curl -sf http://localhost:" + portStr + "/health > /dev/null 2>&1; then\n") + sb.WriteString(" echo 'QMD MCP server is ready'\n") + sb.WriteString(" break\n") + sb.WriteString(" fi\n") + sb.WriteString(" if [ \"$i\" -eq 60 ]; then\n") + sb.WriteString(" echo 'ERROR: QMD MCP server failed to start within 120 s' >&2\n") + sb.WriteString(" cat /tmp/qmd-mcp.log 2>&1 || true\n") + sb.WriteString(" exit 1\n") + sb.WriteString(" fi\n") + sb.WriteString(" sleep 2\n") + sb.WriteString(" done\n") + sb.WriteString(" \n") + return sb.String() +} + +// generateQmdModelsCacheStep generates a step that caches the qmd embedding models directory +// (~/.cache/qmd/models/) using the actions/cache action (restore + post-save), keyed by OS +// and qmd version. This step should be emitted in the indexing job (before index building) to +// populate the cache. For the agent job, use generateQmdModelsCacheRestoreStep instead. +func generateQmdModelsCacheStep() string { + version := string(constants.DefaultQmdVersion) + var sb strings.Builder + sb.WriteString(" - name: Cache qmd models\n") + fmt.Fprintf(&sb, " uses: %s\n", GetActionPin("actions/cache")) + sb.WriteString(" with:\n") + sb.WriteString(" path: ~/.cache/qmd/models/\n") + fmt.Fprintf(&sb, " key: qmd-models-%s-${{ runner.os }}\n", version) + return sb.String() +} + +// generateQmdNodeLlamaCppCacheStep generates a step that caches the node-llama-cpp downloaded +// binaries (~/.cache/node-llama-cpp/) using the actions/cache action (restore + post-save). +// The cache key includes the qmd version, OS, CPU architecture, and runner image ID because +// node-llama-cpp binaries are compiled native code that must match the exact runner image platform. +// This step should be emitted in the indexing job. For the agent job, use +// generateQmdNodeLlamaCppCacheRestoreStep instead. +func generateQmdNodeLlamaCppCacheStep() string { + version := string(constants.DefaultQmdVersion) + var sb strings.Builder + sb.WriteString(" - name: Cache node-llama-cpp binaries\n") + fmt.Fprintf(&sb, " uses: %s\n", GetActionPin("actions/cache")) + sb.WriteString(" with:\n") + sb.WriteString(" path: ~/.cache/node-llama-cpp/\n") + fmt.Fprintf(&sb, " key: node-llama-cpp-%s-${{ runner.os }}-${{ runner.arch }}-${{ runner.imageid }}\n", version) + return sb.String() +} + +// generateQmdModelsCacheRestoreStep generates a read-only step that restores the qmd embedding +// models directory (~/.cache/qmd/models/) from GitHub Actions cache. It uses +// actions/cache/restore (restore-only, no post-save) so the agent job never writes to the +// shared cache — that is the indexing job's responsibility. +func generateQmdModelsCacheRestoreStep() string { + version := string(constants.DefaultQmdVersion) + var sb strings.Builder + sb.WriteString(" - name: Restore qmd models cache\n") + fmt.Fprintf(&sb, " uses: %s\n", GetActionPin("actions/cache/restore")) + sb.WriteString(" with:\n") + sb.WriteString(" path: ~/.cache/qmd/models/\n") + fmt.Fprintf(&sb, " key: qmd-models-%s-${{ runner.os }}\n", version) + return sb.String() +} + +// generateQmdIndexCacheRestoreExactStep generates a read-only restore step for the agent job +// that restores the qmd search index from Actions cache using the PRECISE cache key. +// No restore-keys fallback is used — the agent job must get the exact index that the +// indexing job saved in the current workflow run. +func generateQmdIndexCacheRestoreExactStep(qmdConfig *QmdToolConfig) string { + cacheKey := resolveQmdCacheKey(qmdConfig) + var sb strings.Builder + sb.WriteString(" - name: Restore qmd index from cache\n") + fmt.Fprintf(&sb, " uses: %s\n", GetActionPin("actions/cache/restore")) + sb.WriteString(" with:\n") + fmt.Fprintf(&sb, " key: %s\n", cacheKey) + sb.WriteString(" path: /tmp/gh-aw/qmd-index/\n") + return sb.String() +} + +// resolveQmdCacheKey returns the effective cache key for the qmd index. +// If the user specified an explicit cache-key, that is returned as-is. +// Otherwise a per-run key is generated using the GitHub workflow run ID so that +// the index built in the indexing job is always persisted to cache and the agent +// job can restore it from the cache without needing a separate artifact download on every run. +// +// The default key format is: gh-aw-qmd-- +// (e.g. "gh-aw-qmd-2.0.1-12345678") +func resolveQmdCacheKey(qmdConfig *QmdToolConfig) string { + if qmdConfig.CacheKey != "" { + return qmdConfig.CacheKey + } + return fmt.Sprintf("gh-aw-qmd-%s-${{ github.run_id }}", string(constants.DefaultQmdVersion)) +} + +// resolveQmdRestoreKeys returns the restore-keys prefix list for the qmd index cache. +// The restore keys allow a workflow run to reuse the most recently cached index +// (from a previous run) even when the exact key is not found, so the index can +// be updated incrementally rather than built from scratch every time. +// +// The prefix is derived by stripping the last ${{ ... }} expression from the cache key: +// +// "gh-aw-qmd-${{ github.run_id }}" → ["gh-aw-qmd-"] +// "qmd-index-${{ hashFiles('docs/**') }}" → ["qmd-index-"] +// +// When the key contains no expression suffix, no restore-keys are emitted. +func resolveQmdRestoreKeys(qmdConfig *QmdToolConfig) []string { + key := resolveQmdCacheKey(qmdConfig) + idx := strings.LastIndex(key, "${{") + if idx > 0 { + return []string{key[:idx]} + } + return nil +} + +func generateQmdCacheRestoreStep(qmdConfig *QmdToolConfig) string { + cacheKey := resolveQmdCacheKey(qmdConfig) + restoreKeys := resolveQmdRestoreKeys(qmdConfig) + var sb strings.Builder + sb.WriteString(" - name: Restore qmd index from cache\n") + sb.WriteString(" id: qmd-cache-restore\n") + fmt.Fprintf(&sb, " uses: %s\n", GetActionPin("actions/cache/restore")) + sb.WriteString(" with:\n") + fmt.Fprintf(&sb, " key: %s\n", cacheKey) + sb.WriteString(" path: /tmp/gh-aw/qmd-index/\n") + if len(restoreKeys) > 0 { + sb.WriteString(" restore-keys: |\n") + for _, rk := range restoreKeys { + fmt.Fprintf(&sb, " %s\n", rk) + } + } + return sb.String() +} + +// generateQmdCacheSaveStep generates an activation-job step that saves the qmd index to +// GitHub Actions cache. It only runs when the preceding cache-restore step was a miss. +func generateQmdCacheSaveStep(cacheKey string) string { + var sb strings.Builder + sb.WriteString(" - name: Save qmd index to cache\n") + sb.WriteString(" if: steps.qmd-cache-restore.outputs.cache-hit != 'true'\n") + fmt.Fprintf(&sb, " uses: %s\n", GetActionPin("actions/cache/save")) + sb.WriteString(" with:\n") + fmt.Fprintf(&sb, " key: %s\n", cacheKey) + sb.WriteString(" path: /tmp/gh-aw/qmd-index/\n") + return sb.String() +} + +// qmdCheckoutEntry is the JSON representation of a checkout-based collection +// passed to qmd_index.cjs via the QMD_CONFIG_JSON environment variable. +type qmdCheckoutEntry struct { + Name string `json:"name"` + Path string `json:"path"` + Patterns []string `json:"patterns,omitempty"` + Context string `json:"context,omitempty"` +} + +// qmdSearchEntry is the JSON representation of a search entry passed to qmd_index.cjs. +type qmdSearchEntry struct { + Name string `json:"name,omitempty"` + Type string `json:"type,omitempty"` // "code" (default) or "issues" + Query string `json:"query,omitempty"` // for "code" type + Repo string `json:"repo,omitempty"` // for "issues" type; blank = github.repository + Min int `json:"min,omitempty"` // minimum result count (0 = no minimum) + Max int `json:"max,omitempty"` // maximum result count (0 = use default) + TokenEnvVar string `json:"tokenEnvVar,omitempty"` // env var holding custom GitHub token +} + +// qmdBuildConfig is the top-level JSON config serialised into QMD_CONFIG_JSON +// and consumed by actions/setup/js/qmd_index.cjs. +type qmdBuildConfig struct { + DBPath string `json:"dbPath"` + Checkouts []qmdCheckoutEntry `json:"checkouts,omitempty"` + Searches []qmdSearchEntry `json:"searches,omitempty"` +} + +// resolveQmdWorkdir returns the working directory path for a checkout-based collection. +// Returns "${GITHUB_WORKSPACE}" for the default (current) repository, or the path +// specified / derived from the checkout config for external repositories. +func resolveQmdWorkdir(col *QmdDocCollection) string { + if col.Checkout == nil { + return "${GITHUB_WORKSPACE}" + } + if col.Checkout.Path != "" { + checkoutPath := strings.TrimPrefix(col.Checkout.Path, "./") + return "${GITHUB_WORKSPACE}/" + checkoutPath + } + name := col.Name + if name == "" { + name = "docs" + } + return "/tmp/gh-aw/qmd-checkout-" + name +} + +// buildQmdConfig constructs the qmdBuildConfig from the user-provided QmdToolConfig. +func buildQmdConfig(qmdConfig *QmdToolConfig) qmdBuildConfig { + cfg := qmdBuildConfig{ + DBPath: "/tmp/gh-aw/qmd-index", + } + + for _, col := range qmdConfig.Checkouts { + name := col.Name + if name == "" { + name = "docs" + } + entry := qmdCheckoutEntry{ + Name: name, + Path: resolveQmdWorkdir(col), + Context: col.Context, + } + if len(col.Paths) > 0 { + entry.Patterns = col.Paths + } + cfg.Checkouts = append(cfg.Checkouts, entry) + } + + for i, s := range qmdConfig.Searches { + name := s.Name + if name == "" { + name = fmt.Sprintf("search-%d", i) + } + entry := qmdSearchEntry{ + Name: name, + Type: s.Type, + Query: s.Query, + Min: s.Min, + Max: s.Max, + } + if s.Type == "issues" && s.Query != "" { + entry.Repo = s.Query + } + if s.GitHubToken != "" { + entry.TokenEnvVar = fmt.Sprintf("QMD_SEARCH_TOKEN_%d", i) + } + cfg.Searches = append(cfg.Searches, entry) + } + + return cfg +} + +// generateQmdCollectionCheckoutStep generates a checkout step YAML string for a qmd +// collection that targets a non-default repository. Returns an empty string when the +// collection uses the current repository (no checkout needed). +func generateQmdCollectionCheckoutStep(col *QmdDocCollection) string { + if col.Checkout == nil { + return "" + } + cfg := col.Checkout + + // Determine checkout path used in the runner filesystem + checkoutPath := cfg.Path + if checkoutPath == "" { + checkoutPath = "/tmp/gh-aw/qmd-checkout-" + col.Name + } + + var sb strings.Builder + fmt.Fprintf(&sb, " - name: Checkout %s for qmd\n", col.Name) + fmt.Fprintf(&sb, " uses: %s\n", GetActionPin("actions/checkout")) + sb.WriteString(" with:\n") + sb.WriteString(" persist-credentials: false\n") + if cfg.Repository != "" { + fmt.Fprintf(&sb, " repository: %s\n", cfg.Repository) + } + if cfg.Ref != "" { + fmt.Fprintf(&sb, " ref: %s\n", cfg.Ref) + } + fmt.Fprintf(&sb, " path: %s\n", checkoutPath) + if cfg.GitHubToken != "" { + fmt.Fprintf(&sb, " token: %s\n", cfg.GitHubToken) + } + if cfg.FetchDepth != nil { + fmt.Fprintf(&sb, " fetch-depth: %d\n", *cfg.FetchDepth) + } + if cfg.SparseCheckout != "" { + sb.WriteString(" sparse-checkout: |\n") + for line := range strings.SplitSeq(strings.TrimRight(cfg.SparseCheckout, "\n"), "\n") { + fmt.Fprintf(&sb, " %s\n", strings.TrimSpace(line)) + } + } + if cfg.Submodules != "" { + fmt.Fprintf(&sb, " submodules: %s\n", cfg.Submodules) + } + if cfg.LFS { + sb.WriteString(" lfs: true\n") + } + return sb.String() +} + +// generateQmdIndexSteps generates the indexing job steps that install the @tobilu/qmd SDK, +// run the qmd_index.cjs JavaScript script to build the vector search index, and save it +// to GitHub Actions cache. +// +// The configuration is serialised to JSON and passed via the QMD_CONFIG_JSON environment +// variable to the github-script step. qmd_index.cjs uses the @tobilu/qmd SDK to: +// 1. Register checkout-based collections +// 2. Fetch GitHub search/issue results and register them as collections +// 3. Call store.update() and store.embed() to index and embed all documents +// +// A cache restore step is always emitted first using the resolved cache key (user-provided +// or the default per-run key gh-aw-qmd-${{ github.run_id }}). When qmdConfig.CacheKey is +// not set, the default run-scoped key means the cache is ephemeral (only used within a +// single workflow run). When qmdConfig.CacheKey IS set, the cache is durable across runs. +// +// Modes: +// - Read-only mode (cache-key set, no sources): only cache restore + cache save (skipped on hit). +// - Build mode (sources present): indexing steps are guarded by +// `if: steps.qmd-cache-restore.outputs.cache-hit != 'true'`, so they are skipped on a +// cache hit. A cache save step always follows. +func generateQmdIndexSteps(qmdConfig *QmdToolConfig) []string { + hasSources := qmdHasSources(qmdConfig) + isCacheOnlyMode := qmdConfig.CacheKey != "" && !hasSources + cacheKey := resolveQmdCacheKey(qmdConfig) + qmdLog.Printf("Generating qmd index steps: checkouts=%d searches=%d cacheKey=%q cacheOnly=%v", + len(qmdConfig.Checkouts), len(qmdConfig.Searches), cacheKey, isCacheOnlyMode) + + version := string(constants.DefaultQmdVersion) + var steps []string + + // Always restore from cache first; the step ID lets subsequent steps detect cache-hit. + steps = append(steps, generateQmdCacheRestoreStep(qmdConfig)) + + // Always cache qmd embedding models to avoid re-downloading on each run + // Cache qmd models and node-llama-cpp binaries in separate caches so they can be + // invalidated independently. The node-llama-cpp key also includes the CPU architecture + // because those binaries are compiled native code that must match the runner platform. + steps = append(steps, generateQmdModelsCacheStep()) + steps = append(steps, generateQmdNodeLlamaCppCacheStep()) + + // Cache-only mode: no indexing at all — just use the restored cache + if isCacheOnlyMode { + qmdLog.Print("qmd cache-only mode: skipping indexing, using cache only") + } else { + // Build steps are skipped when the cache was already populated on a previous run. + ifCacheMiss := " if: steps.qmd-cache-restore.outputs.cache-hit != 'true'\n" + + // Setup Node.js (required to run the qmd SDK) + nodeSetup := " - name: Setup Node.js for qmd\n" + nodeSetup += ifCacheMiss + nodeSetup += fmt.Sprintf(" uses: %s\n", GetActionPin("actions/setup-node")) + nodeSetup += " with:\n" + nodeSetup += fmt.Sprintf(" node-version: \"%s\"\n", string(constants.DefaultNodeVersion)) + steps = append(steps, nodeSetup) + + // Install the @tobilu/qmd SDK into the gh-aw actions directory so qmd_index.cjs + // can require('@tobilu/qmd') via the adjacent node_modules folder. + npmInstall := " - name: Install @tobilu/qmd SDK\n" + npmInstall += ifCacheMiss + npmInstall += " run: |\n" + npmInstall += fmt.Sprintf(" npm install --prefix \"${{ runner.temp }}/gh-aw/actions\" @tobilu/qmd@%s @actions/github\n", version) + steps = append(steps, npmInstall) + + // Emit a checkout step for each collection that targets a non-default repository + for _, col := range qmdConfig.Checkouts { + if checkoutStep := generateQmdCollectionCheckoutStep(col); checkoutStep != "" { + steps = append(steps, checkoutStep) + } + } + + // Build the JSON configuration for qmd_index.cjs + cfg := buildQmdConfig(qmdConfig) + cfgJSON, err := json.Marshal(cfg) + if err != nil { + qmdLog.Printf("Failed to marshal qmd config: %v", err) + cfgJSON = []byte("{}") + } + + // Generate the github-script step that runs qmd_index.cjs + var scriptSB strings.Builder + scriptSB.WriteString(" - name: Build qmd index\n") + scriptSB.WriteString(ifCacheMiss) + fmt.Fprintf(&scriptSB, " uses: %s\n", GetActionPin("actions/github-script")) + scriptSB.WriteString(" env:\n") + // Pass the config JSON as an env var; the YAML literal block avoids quoting issues + scriptSB.WriteString(" QMD_CONFIG_JSON: |\n") + fmt.Fprintf(&scriptSB, " %s\n", string(cfgJSON)) + // Disable GPU acceleration by default; only enable when the user explicitly opts in. + // This prevents node-llama-cpp from spending time probing GPU drivers on CPU runners. + if !qmdConfig.GPU { + scriptSB.WriteString(" NODE_LLAMA_CPP_GPU: \"false\"\n") + } + // Add per-search custom token env vars + for i, s := range qmdConfig.Searches { + if s.GitHubToken != "" { + fmt.Fprintf(&scriptSB, " QMD_SEARCH_TOKEN_%d: %s\n", i, s.GitHubToken) + } + } + scriptSB.WriteString(" with:\n") + scriptSB.WriteString(" github-token: ${{ github.token }}\n") + scriptSB.WriteString(" script: |\n") + fmt.Fprintf(&scriptSB, " const { setupGlobals } = require('%s/setup_globals.cjs');\n", SetupActionDestination) + scriptSB.WriteString(" setupGlobals(core, github, context, exec, io);\n") + fmt.Fprintf(&scriptSB, " const { main } = require('%s/qmd_index.cjs');\n", SetupActionDestination) + scriptSB.WriteString(" await main();\n") + steps = append(steps, scriptSB.String()) + + // Always save to cache (on build; skipped on cache hit by the save step condition). + steps = append(steps, generateQmdCacheSaveStep(cacheKey)) + } + + return steps +} + +// buildQmdIndexingJob builds a standalone "indexing" job that depends on the activation job +// and builds the qmd documentation search index. +// +// The job: +// 1. Checks out the actions folder (for the setup action scripts) +// 2. Runs the setup action to copy qmd_index.cjs and setup_globals.cjs to the runner +// 3. Optionally checks out the workspace for checkout-based collections +// 4. Installs @tobilu/qmd and @actions/github and runs qmd_index.cjs via actions/github-script +// 5. Saves the resulting index to GitHub Actions cache +// +// The agent job declares a needs dependency on this "indexing" job and restores the index from cache. +func (c *Compiler) buildQmdIndexingJob(data *WorkflowData) (*Job, error) { + qmdLog.Printf("Building qmd indexing job: checkouts=%d searches=%d cacheKey=%q", + len(data.QmdConfig.Checkouts), len(data.QmdConfig.Searches), data.QmdConfig.CacheKey) + + var steps []string + + // Check out the actions folder so the setup action scripts are available on the runner. + steps = append(steps, c.generateCheckoutActionsFolder(data)...) + + // Run the setup action to copy qmd_index.cjs and setup_globals.cjs to SetupActionDestination. + setupActionRef := c.resolveActionReference("./actions/setup", data) + steps = append(steps, c.generateSetupStep(setupActionRef, SetupActionDestination, false)...) + + // Check out the repository workspace if any checkout-based collection uses the default repo + // (i.e., no per-collection checkout config, meaning it relies on ${GITHUB_WORKSPACE}). + needsWorkspaceCheckout := false + for _, col := range data.QmdConfig.Checkouts { + if col.Checkout == nil { + needsWorkspaceCheckout = true + break + } + } + if needsWorkspaceCheckout { + var sb strings.Builder + sb.WriteString(" - name: Checkout repository for qmd indexing\n") + fmt.Fprintf(&sb, " uses: %s\n", GetActionPin("actions/checkout")) + sb.WriteString(" with:\n") + sb.WriteString(" persist-credentials: false\n") + steps = append(steps, sb.String()) + } + + // Generate all qmd index-building steps (cache restore/save, Node.js, SDK install, github-script). + qmdSteps := generateQmdIndexSteps(data.QmdConfig) + steps = append(steps, qmdSteps...) + + // The indexing job runs after the activation job to inherit the artifact prefix output. + needs := []string{string(constants.ActivationJobName)} + + // Permissions: contents:read is required to checkout files for index building. + perms := NewPermissionsFromMap(map[PermissionScope]PermissionLevel{ + PermissionContents: PermissionRead, + }) + + // Determine the runner for the indexing job. + // Default to aw-gpu-runner-T4 for GPU-accelerated embedding; user can override via qmd.runs-on. + indexingRunsOn := "runs-on: " + constants.DefaultQmdIndexingRunnerImage + if data.QmdConfig.RunsOn != "" { + indexingRunsOn = "runs-on: " + data.QmdConfig.RunsOn + } + + job := &Job{ + Name: string(constants.IndexingJobName), + RunsOn: indexingRunsOn, + Permissions: perms.RenderToYAML(), + Steps: steps, + Needs: needs, + TimeoutMinutes: 60, // building the qmd index can take a while for large doc sets + } + + return job, nil +} diff --git a/pkg/workflow/tools_parser.go b/pkg/workflow/tools_parser.go index e8f2de7b55..5f4cab73b6 100644 --- a/pkg/workflow/tools_parser.go +++ b/pkg/workflow/tools_parser.go @@ -129,6 +129,9 @@ func NewTools(toolsMap map[string]any) *Tools { if val, exists := toolsMap["playwright"]; exists { tools.Playwright = parsePlaywrightTool(val) } + if val, exists := toolsMap["qmd"]; exists { + tools.Qmd = parseQmdTool(val) + } if val, exists := toolsMap["serena"]; exists { tools.Serena = parseSerenaTool(val) } @@ -156,6 +159,7 @@ func NewTools(toolsMap map[string]any) *Tools { "web-search": true, "edit": true, "playwright": true, + "qmd": true, "serena": true, "agentic-workflows": true, "cache-memory": true, @@ -173,7 +177,7 @@ func NewTools(toolsMap map[string]any) *Tools { } } - toolsParserLog.Printf("Parsed tools: github=%v, bash=%v, playwright=%v, serena=%v, custom=%d", tools.GitHub != nil, tools.Bash != nil, tools.Playwright != nil, tools.Serena != nil, customCount) + toolsParserLog.Printf("Parsed tools: github=%v, bash=%v, playwright=%v, qmd=%v, serena=%v, custom=%d", tools.GitHub != nil, tools.Bash != nil, tools.Playwright != nil, tools.Qmd != nil, tools.Serena != nil, customCount) return tools } @@ -406,6 +410,171 @@ func parsePlaywrightTool(val any) *PlaywrightToolConfig { return &PlaywrightToolConfig{} } +// parseQmdTool converts raw qmd tool configuration to QmdToolConfig. +// Supported fields: +// +// - checkouts: list of named collections (with optional checkout per entry) +// - searches: list of GitHub search queries +// - cache-key: optional GitHub Actions cache key +// - gpu: enable GPU acceleration for node-llama-cpp (default: false) +// - runs-on: override runner image for the indexing job +func parseQmdTool(val any) *QmdToolConfig { + if val == nil { + toolsParserLog.Print("qmd tool enabled with empty configuration") + return &QmdToolConfig{} + } + + if configMap, ok := val.(map[string]any); ok { + config := &QmdToolConfig{} + + // Handle cache-key field + if cacheKey, ok := configMap["cache-key"].(string); ok && cacheKey != "" { + config.CacheKey = cacheKey + toolsParserLog.Printf("qmd tool cache-key: %s", cacheKey) + } + + // Handle gpu field (defaults to false — GPU disabled by default) + if gpuVal, exists := configMap["gpu"]; exists { + if gpuBool, ok := gpuVal.(bool); ok { + config.GPU = gpuBool + toolsParserLog.Printf("qmd tool gpu: %v", gpuBool) + } + } + + // Handle runs-on field (override runner image for the indexing job) + if runsOnVal, exists := configMap["runs-on"]; exists { + if runsOnStr, ok := runsOnVal.(string); ok && runsOnStr != "" { + config.RunsOn = runsOnStr + toolsParserLog.Printf("qmd tool runs-on: %s", runsOnStr) + } + } + + // Handle checkouts field + if checkoutsValue, ok := configMap["checkouts"]; ok { + if arr, ok := checkoutsValue.([]any); ok { + config.Checkouts = make([]*QmdDocCollection, 0, len(arr)) + for i, item := range arr { + itemMap, ok := item.(map[string]any) + if !ok { + continue + } + col := parseQmdDocCollection(itemMap, i) + config.Checkouts = append(config.Checkouts, col) + } + toolsParserLog.Printf("qmd tool parsed %d checkouts", len(config.Checkouts)) + } + } + + // Handle searches field + if searchesValue, ok := configMap["searches"]; ok { + if arr, ok := searchesValue.([]any); ok { + config.Searches = make([]*QmdSearchEntry, 0, len(arr)) + for _, item := range arr { + itemMap, ok := item.(map[string]any) + if !ok { + continue + } + entry := parseQmdSearchEntry(itemMap) + config.Searches = append(config.Searches, entry) + } + toolsParserLog.Printf("qmd tool parsed %d searches", len(config.Searches)) + } + } + + return config + } + + return &QmdToolConfig{} +} + +// parseQmdDocCollection converts a raw map to a QmdDocCollection. +// The index parameter is used to generate a default name when none is provided. +func parseQmdDocCollection(m map[string]any, index int) *QmdDocCollection { + col := &QmdDocCollection{} + + if name, ok := m["name"].(string); ok && name != "" { + col.Name = name + } else { + col.Name = fmt.Sprintf("docs-%d", index) + } + + if pathsValue, ok := m["paths"]; ok { + if arr, ok := pathsValue.([]any); ok { + col.Paths = make([]string, 0, len(arr)) + for _, item := range arr { + if str, ok := item.(string); ok { + col.Paths = append(col.Paths, str) + } + } + } else if arr, ok := pathsValue.([]string); ok { + col.Paths = arr + } + } + + if context, ok := m["context"].(string); ok { + col.Context = context + } + + if checkoutValue, ok := m["checkout"]; ok { + if checkoutMap, ok := checkoutValue.(map[string]any); ok { + if cfg, err := checkoutConfigFromMap(checkoutMap); err == nil { + col.Checkout = cfg + } else { + toolsParserLog.Printf("qmd collection %q: ignoring invalid checkout config: %v", col.Name, err) + } + } + } + + return col +} + +// parseQmdSearchEntry converts a raw map to a QmdSearchEntry. +func parseQmdSearchEntry(m map[string]any) *QmdSearchEntry { + entry := &QmdSearchEntry{} + + if n, ok := m["name"].(string); ok { + entry.Name = n + } + if t, ok := m["type"].(string); ok { + entry.Type = t + } + if q, ok := m["query"].(string); ok { + entry.Query = q + } + entry.Min = parseYAMLInt(m["min"]) + entry.Max = parseYAMLInt(m["max"]) + + if token, ok := m["github-token"].(string); ok { + entry.GitHubToken = token + } + + if appMap, ok := m["github-app"].(map[string]any); ok { + entry.GitHubApp = parseAppConfig(appMap) + } + + return entry +} + +// parseYAMLInt converts a YAML-unmarshaled numeric value to int. +// goccy/go-yaml unmarshals integers as uint64; standard yaml/v3 uses int. +// float64 is also handled for completeness. +func parseYAMLInt(v any) int { + if v == nil { + return 0 + } + switch n := v.(type) { + case int: + return n + case int64: + return int(n) + case uint64: + return int(n) + case float64: + return int(n) + } + return 0 +} + // parseSerenaTool converts raw serena tool configuration to SerenaToolConfig func parseSerenaTool(val any) *SerenaToolConfig { if val == nil { diff --git a/pkg/workflow/tools_types.go b/pkg/workflow/tools_types.go index fe571c8ff2..db51a105d3 100644 --- a/pkg/workflow/tools_types.go +++ b/pkg/workflow/tools_types.go @@ -73,6 +73,7 @@ type ToolsConfig struct { WebSearch *WebSearchToolConfig `yaml:"web-search,omitempty"` Edit *EditToolConfig `yaml:"edit,omitempty"` Playwright *PlaywrightToolConfig `yaml:"playwright,omitempty"` + Qmd *QmdToolConfig `yaml:"qmd,omitempty"` Serena *SerenaToolConfig `yaml:"serena,omitempty"` AgenticWorkflows *AgenticWorkflowsToolConfig `yaml:"agentic-workflows,omitempty"` CacheMemory *CacheMemoryToolConfig `yaml:"cache-memory,omitempty"` @@ -199,6 +200,9 @@ func (t *ToolsConfig) ToMap() map[string]any { if t.Playwright != nil { result["playwright"] = t.Playwright } + if t.Qmd != nil { + result["qmd"] = t.Qmd + } if t.Serena != nil { // Convert back based on whether it was short syntax or object if len(t.Serena.ShortSyntax) > 0 { @@ -323,6 +327,111 @@ type PlaywrightToolConfig struct { Args []string `yaml:"args,omitempty"` } +// QmdDocCollection represents a named documentation collection for the qmd tool. +// Each collection indexes its own set of files and can optionally target a different +// repository via its own checkout configuration. +type QmdDocCollection struct { + // Name is the collection identifier used in the qmd index. + // Defaults to "docs-" when not provided (e.g. "docs-0", "docs-1"). + Name string `yaml:"name,omitempty"` + + // Paths is the list of glob patterns for files to include in this collection. + // Example: ["docs/**/*.md", ".github/**/*.md"] + Paths []string `yaml:"paths"` + + // Context is optional extra context injected into the qmd collection, + // providing the agent with additional hints about the content (e.g. "GitHub Actions documentation"). + Context string `yaml:"context,omitempty"` + + // Checkout configures which repository to check out for this collection. + // Uses the same syntax as the top-level checkout configuration. + // Defaults to the current repository if not set. + Checkout *CheckoutConfig `yaml:"checkout,omitempty"` +} + +// QmdSearchEntry represents a single GitHub search entry whose results are +// downloaded and added to the qmd index as individual files. +type QmdSearchEntry struct { + // Name is an optional name for the resulting qmd collection. + // When empty, the collection is named "search-{index}". + Name string `yaml:"name,omitempty"` + + // Type controls the search backend. Supported values: + // "code" (default) – uses `gh search code` to find repository files + // "issues" – uses `gh issue list` to fetch open issues from + // a repository and save each as a markdown file + // When type is "issues", Query is the repository slug ("owner/repo"). + // If Query is empty for an issue search, ${{ github.repository }} is used. + Type string `yaml:"type,omitempty"` + + // Query is the GitHub code search query string (type "code") or the + // repository slug "owner/repo" (type "issues"). + // Example (code): "repo:owner/repo language:Markdown path:docs/" + // Example (issues): "owner/repo" (or empty to use current repository) + Query string `yaml:"query,omitempty"` + + // Min is the minimum number of results required. If fewer are found + // the activation step fails. + Min int `yaml:"min,omitempty"` + + // Max is the maximum number of results to download. + // Defaults to 30 (type "code") or 500 (type "issues") when not set. + Max int `yaml:"max,omitempty"` + + // GitHubToken overrides the default GITHUB_TOKEN used to authenticate + // the GitHub API request. + // Mutually exclusive with GitHubApp. + GitHubToken string `yaml:"github-token,omitempty"` + + // GitHubApp configures GitHub App-based authentication for the API request. + // Mutually exclusive with GitHubToken. + GitHubApp *GitHubAppConfig `yaml:"github-app,omitempty"` +} + +// QmdToolConfig represents the configuration for the qmd documentation search tool. +// qmd (https://github.com/tobi/qmd) provides local vector search over documentation files. +// The index is built in a dedicated indexing job and shared via GitHub Actions cache, so no +// contents:read permission is needed in the agent job. +// +// Two sources can contribute to the index: +// +// 1. checkouts – glob-based collections from checked-out repositories +// 2. searches – GitHub search queries whose results are downloaded as files +// +// Optionally, the index can be cached in GitHub Actions cache using the cache-key field. +// When cache-key is set without any sources (checkouts/searches), qmd operates in +// read-only mode: it restores the index from cache and skips all indexing steps. +type QmdToolConfig struct { + // Checkouts is the list of named documentation collections. + // Each collection can specify its own checkout to target a different repository. + Checkouts []*QmdDocCollection `yaml:"checkouts,omitempty"` + + // Searches is the list of GitHub search queries whose results are downloaded + // and added to the qmd index. + Searches []*QmdSearchEntry `yaml:"searches,omitempty"` + + // CacheKey is an optional GitHub Actions cache key used to persist the qmd index + // across workflow runs. When set: + // - If sources (checkouts/searches) are also configured: the index is built + // normally and then saved to the cache. On subsequent runs, the cached index is + // restored and the build steps are skipped if the cache hit is exact. + // - If no sources are configured (read-only mode): the index is restored directly + // from cache without any indexing steps. + // Example: "qmd-index-${{ hashFiles('docs/**') }}" + CacheKey string `yaml:"cache-key,omitempty"` + + // GPU controls whether node-llama-cpp (used by @tobilu/qmd internally) may use + // GPU acceleration. Defaults to false: NODE_LLAMA_CPP_GPU=false is injected into + // the indexing step so that GPU auto-detection is skipped on CPU-only runners. + // Set to true only when the indexing runner has a GPU. + GPU bool `yaml:"gpu,omitempty"` + + // RunsOn overrides the runner image for the indexing job. + // Defaults to the same runner as the agent job (ubuntu-latest or as configured). + // Example: "ubuntu-latest-gpu" or ["self-hosted", "gpu"] + RunsOn string `yaml:"runs-on,omitempty"` +} + // SerenaToolConfig represents the configuration for the Serena MCP tool type SerenaToolConfig struct { Version string `yaml:"version,omitempty"` @@ -436,6 +545,8 @@ func (t *Tools) HasTool(name string) bool { return t.Edit != nil case "playwright": return t.Playwright != nil + case "qmd": + return t.Qmd != nil case "serena": return t.Serena != nil case "agentic-workflows": @@ -481,6 +592,9 @@ func (t *Tools) GetToolNames() []string { if t.Playwright != nil { names = append(names, "playwright") } + if t.Qmd != nil { + names = append(names, "qmd") + } if t.Serena != nil { names = append(names, "serena") } diff --git a/pkg/workflow/unified_prompt_step.go b/pkg/workflow/unified_prompt_step.go index 8075960a4e..e5091012fd 100644 --- a/pkg/workflow/unified_prompt_step.go +++ b/pkg/workflow/unified_prompt_step.go @@ -136,6 +136,15 @@ func (c *Compiler) collectPromptSections(data *WorkflowData) []PromptSection { }) } + // 3a. qmd instructions (if qmd tool is enabled) + if hasQmdTool(data.ParsedTools) { + unifiedPromptLog.Print("Adding qmd section") + sections = append(sections, PromptSection{ + Content: qmdPromptFile, + IsFile: true, + }) + } + // 4. Agentic Workflows MCP guide (if agentic-workflows tool is enabled) if hasAgenticWorkflowsTool(data.ParsedTools) { unifiedPromptLog.Print("Adding agentic-workflows guide section")