From bcfa56f43250cdc40119ea8792667be8818508c7 Mon Sep 17 00:00:00 2001 From: hey-intent Date: Fri, 1 May 2026 17:13:41 +0200 Subject: [PATCH] feat: upgrade issue starting prompt & data refactor: isolation of prompts chore: python 3.12 --- CONTRIBUTING.md | 15 +-- README.md | 103 +++++++++++-------- SECURITY.md | 1 + app/app.py | 19 ++-- app/requirements-dev.txt | 2 +- docs/adr/0001-source-provider-abstraction.md | 10 +- images/worker-aider/Dockerfile | 5 +- images/worker-aider/run.sh | 8 +- images/worker-claude/Dockerfile | 5 +- images/worker-claude/run.sh | 8 +- images/worker-codex/Dockerfile | 5 +- images/worker-codex/run.sh | 8 +- k8s/ai-issue-aider.yaml | 8 +- k8s/ai-issue-claude.yaml | 8 +- k8s/ai-issue-codex.yaml | 8 +- prompt/issue_prompt.sh | 13 +++ prompt/issue_start_prompt.sh | 37 +++++++ providers/aider.sh | 14 ++- providers/claude_code.sh | 14 ++- providers/git_workflow.sh | 8 +- providers/openai.sh | 14 ++- pyproject.toml | 11 ++ requirements-dev.txt | 2 + tests/app/test_webhook.py | 81 +++++++++++++-- tests/conftest.py | 11 ++ 25 files changed, 301 insertions(+), 117 deletions(-) create mode 100644 prompt/issue_prompt.sh create mode 100644 prompt/issue_start_prompt.sh create mode 100644 requirements-dev.txt create mode 100644 tests/conftest.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8a05120..2ebac50 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -79,7 +79,7 @@ source "$SCRIPT_DIR/git_workflow.sh" git_clone_and_branch # 3. Appeler le CLI IA -myprovider-cli run "Fix issue #${ISSUE_NUMBER}: ${GITHUB_ISSUE_TITLE:-no title}. ..." +myprovider-cli run "Fix issue #${ISSUE_NUMBER}: ${SOURCE_ISSUE_TITLE:-no title}. ..." # 4. Push & PR (logique partagee) git_push_and_pr "Automated PR created by MyProvider for issue #${ISSUE_NUMBER}." @@ -96,12 +96,12 @@ set -euo pipefail echo "=== worker start ===" echo "TIME: $(date -u --iso-8601=seconds)" echo "AI_PROVIDER=${AI_PROVIDER:-myprovider}" -echo "GITHUB_REPO=${GITHUB_REPO:-}" -echo "GITHUB_ISSUE_NUMBER=${GITHUB_ISSUE_NUMBER:-}" -echo "GITHUB_INSTALLATION_ID=${GITHUB_INSTALLATION_ID:-}" +echo "SOURCE_REPO=${SOURCE_REPO:-}" +echo "SOURCE_ISSUE_NUMBER=${SOURCE_ISSUE_NUMBER:-}" +echo "SOURCE_INSTALLATION_ID=${SOURCE_INSTALLATION_ID:-}" if [[ "${DEBUG_ENV:-0}" == "1" ]]; then echo "---- env (whitelist) ----" - printenv | grep -E '^(AI_PROVIDER|GITHUB_REPO|GITHUB_ISSUE_NUMBER|GITHUB_INSTALLATION_ID|NAMESPACE|JOB_IMAGE|HOME|PATH)=' || true + printenv | grep -E '^(AI_PROVIDER|SOURCE_REPO|SOURCE_ISSUE_NUMBER|SOURCE_INSTALLATION_ID|NAMESPACE|JOB_IMAGE|HOME|PATH)=' || true echo "---- end env ----" fi @@ -133,8 +133,9 @@ RUN curl -fsSL https://example.com/install.sh | bash WORKDIR /app COPY --chown=worker:worker images/worker-myprovider/run.sh /app/run.sh COPY --chown=worker:worker providers/ /app/providers/ -RUN sed -i 's/\r$//' /app/run.sh /app/providers/*.sh \ - && chmod +x /app/run.sh /app/providers/*.sh +COPY --chown=worker:worker prompt/ /app/prompt/ +RUN sed -i 's/\r$//' /app/run.sh /app/providers/*.sh /app/prompt/*.sh \ + && chmod +x /app/run.sh /app/providers/*.sh /app/prompt/*.sh ENV PATH="/home/worker/.local/bin:${PATH}" WORKDIR /work diff --git a/README.md b/README.md index 055cd58..bc187d3 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Kubernetes orchestrator that turns GitHub issues into pull requests using AI age This project automates the **Issue -> Label -> Pull Request** flow: an `ai-pr-*` label on an issue triggers an AI worker that clones the repo, solves the problem, and opens a PR. -It avoids AI vendor lock-in with 3 built-in worker providers: +It avoids AI vendor lock-in with 3 built-in worker providers: | Label | Provider | Backend | | -------------- | ----------- | ----------------------- | @@ -14,12 +14,12 @@ It avoids AI vendor lock-in with 3 built-in worker providers: | `ai-pr-codex` | Codex | OpenAI | | `ai-pr-aider` | Aider | OpenRouter (extensible) | -The source hosting layer is abstracted behind `SourceProvider`; GitHub is the -only built-in source provider today. See -`docs/adr/0001-source-provider-abstraction.md` for the design decision. - -The worker architecture is designed to easily add more AI providers (see -`CONTRIBUTING.md`). +The source hosting layer is abstracted behind `SourceProvider`; GitHub is the +only built-in source provider today. See +`docs/adr/0001-source-provider-abstraction.md` for the design decision. + +The worker architecture is designed to easily add more AI providers (see +`CONTRIBUTING.md`). Tested on: VPS / 8 GB RAM / 4 vCPU / k3s single-node. @@ -38,12 +38,12 @@ GitHub Issue (label ai-pr-*) POST /webhook/github | v - +-------------------+ - | Orchestrator | Deployment FastAPI - | app/app.py | - | providers/source | GitHub webhook + clone credentials - +--------+----------+ - | creates a K8s Job based on the AI worker provider + +-------------------+ + | Orchestrator | Deployment FastAPI + | app/app.py | + | providers/source | GitHub webhook + clone credentials + +--------+----------+ + | creates a K8s Job based on the AI worker provider v +----------------+ +----------------+ +----------------+ | worker-claude | | worker-codex | | worker-aider | @@ -54,9 +54,16 @@ GitHub Issue (label ai-pr-*) clone > AI fix > commit > push > PR ``` -**Source auth flow**: `GitHubProvider` generates an ephemeral installation token -(1h) via GitHub App JWT and returns git clone credentials to the orchestrator. -Workers receive only the short-lived token and never receive the PEM key. +**Source auth flow**: `GitHubProvider` generates an ephemeral installation token +(1h) via GitHub App JWT and returns git clone credentials to the orchestrator. +Workers receive only the short-lived token and never receive the PEM key. + +**Job environment (metadata)** : the orchestrator injects source-agnostic +variables for each worker Job: `SOURCE_REPO`, `SOURCE_ISSUE_NUMBER`, +`SOURCE_ISSUE_TITLE`, `SOURCE_ISSUE_BODY` (GitHub issue description, bounded to +64 KiB), `SOURCE_ISSUE_URL`, `SOURCE_EVENT_ACTION`, `SOURCE_INSTALLATION_ID`. +See `docs/adr/0001-source-provider-abstraction.md`. The clone credential secret +key remains `GITHUB_TOKEN` (installation token). --- @@ -65,7 +72,7 @@ Workers receive only the short-lived token and never receive the PEM key. ### 1. Prerequisites - A VPS (or machine) with 4 vCPU / 8 GB RAM minimum -- API keys for your desired AI worker providers +- API keys for your desired AI worker providers - **Ansible option**: `ansible` installed locally + SSH root access to the VPS - **Manual option**: k3s, Docker, and `kubectl` installed on the VPS @@ -230,6 +237,10 @@ kubectl -n ai-bot delete job debug- --ignore-not-found && kubectl -n a ### Manual Jobs (ai-issue) +> Manual jobs in `k8s/ai-issue-*.yaml` must set `SOURCE_REPO`, +> `SOURCE_ISSUE_NUMBER`, and `SOURCE_INSTALLATION_ID` (not the old `GITHUB_*` +> names). + ```shell # Run / logs / rerun (replace ) kubectl -n ai-bot apply -f k8s/ai-issue-.yaml @@ -259,24 +270,24 @@ curl -s -X POST http://127.0.0.1:8080/jobs/run -H "Authorization: Bearer 30 min (token expires at 1h) -- Regularly rotate `WEBHOOK_SECRET` and `ADMIN_TOKEN` -- Review any new `SourceProvider` for webhook verification, credential scope, - and logging behavior -- See `SECURITY.md` for vulnerability reporting and provider security rules +- Restrict RBAC access to Secrets and Jobs +- Monitor jobs > 30 min (token expires at 1h) +- Regularly rotate `WEBHOOK_SECRET` and `ADMIN_TOKEN` +- Review any new `SourceProvider` for webhook verification, credential scope, + and logging behavior +- See `SECURITY.md` for vulnerability reporting and provider security rules --- @@ -288,6 +299,7 @@ curl -s -X POST http://127.0.0.1:8080/jobs/run -H "Authorization: Bearer --previous` | | `Not logged in` | Missing API secret (depends on provider) | | `Pods Pending` | `kubectl describe pod ` | +| `Job missing SOURCE_REPO` / clone fails | Orchestrator + worker images out of sync; manual YAML still using `GITHUB_REPO` / `GITHUB_ISSUE_*` — use `SOURCE_*` envs | | Job 409 conflict | Job already exists, `kubectl delete job ` | ```shell @@ -302,10 +314,10 @@ sudo systemctl status k3s --no-pager -l ```text . -|-- app/ -| |-- app.py # FastAPI Orchestrator -| |-- config.py # Runtime env/config -| `-- requirements.txt +|-- app/ +| |-- app.py # FastAPI Orchestrator +| |-- config.py # Runtime env/config +| `-- requirements.txt |-- images/ | |-- orchestrator/Dockerfile | |-- worker-claude/ # Dockerfile + run.sh @@ -318,12 +330,15 @@ sudo systemctl status k3s --no-pager -l | |-- ai-issue-*.yaml # Manual jobs per provider | |-- debug-*.yaml # Debug jobs per provider | `-- secrets/ # Templates (no values) -|-- providers/ -| |-- source/ # SourceProvider interface + GitHub implementation -| |-- git_workflow.sh # Shared Git logic -| |-- claude_code.sh -| |-- openai.sh -| `-- aider.sh +|-- prompt/ +| |-- issue_prompt.sh # Optional SOURCE_ISSUE_BODY appendix +| `-- issue_start_prompt.sh # Shared task instructions (all workers) +|-- providers/ +| |-- source/ # SourceProvider interface + GitHub implementation +| |-- git_workflow.sh # Shared Git logic +| |-- claude_code.sh +| |-- openai.sh +| `-- aider.sh |-- ansible/ | |-- playbook.yml # Full VPS deployment | |-- inventory.ini @@ -331,9 +346,9 @@ sudo systemctl status k3s --no-pager -l | |-- inventory-prod.ini # gitignored | |-- requirements.yml # Ansible collections | `-- group_vars/vps.yml -|-- docs/ -| |-- adr/ # Architecture decision records -| `-- workspace.dsl # C4 architecture (Structurizr) +|-- docs/ +| |-- adr/ # Architecture decision records +| `-- workspace.dsl # C4 architecture (Structurizr) |-- .github/ | `-- workflows/secret-scan.yml # CI secret scanning |-- CONTRIBUTING.md diff --git a/SECURITY.md b/SECURITY.md index 3a790ee..ba2b403 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -25,6 +25,7 @@ If your report contains secrets, rotate them immediately after sharing. - Never commit real secret values to git history. - Kubernetes secret manifests under `k8s/secrets/` are templates only. - Webhook fixture files under `tests/` must be anonymized and must not contain real repository names, users, tokens, signatures, private issue content, or internal URLs. +- **Worker Jobs** receive issue metadata in environment variables (`SOURCE_ISSUE_TITLE`, `SOURCE_ISSUE_BODY`, etc.). Issue bodies are **untrusted user content**; they are included in AI prompts after an orchestrator-side size cap. Operators should assume **prompt-injection** risk from issue text (same class of risk as pasting issue content into any LLM). Do not log full `SOURCE_ISSUE_BODY` in production. ## Source Provider Security diff --git a/app/app.py b/app/app.py index 5ff971a..45b2fe2 100644 --- a/app/app.py +++ b/app/app.py @@ -29,6 +29,9 @@ get_provider, ) +# Issue body in a pod env var must stay bounded (etcd / API limits, huge GitHub bodies). +_MAX_ISSUE_BODY_CHARS = 65536 + # --- Logging setup --- # Use uvicorn's logger so messages aren't disabled by uvicorn's dictConfig logger = logging.getLogger("uvicorn.error") @@ -322,6 +325,8 @@ async def github_webhook(request: Request): issue = payload.get("issue") or {} issue_number = issue.get("number") issue_title = issue.get("title", "")[:200] + raw_issue_body = issue.get("body") + issue_body = (raw_issue_body if isinstance(raw_issue_body, str) else "")[:_MAX_ISSUE_BODY_CHARS] issue_url = issue.get("html_url", "") installation_id = (payload.get("installation") or {}).get("id") @@ -356,12 +361,14 @@ async def github_webhook(request: Request): cfg=cfg, provider=provider, env_vars={ - "GITHUB_REPO": repo_full, - "GITHUB_ISSUE_NUMBER": str(issue_number), - "GITHUB_EVENT_ACTION": str(action), - "GITHUB_ISSUE_TITLE": issue_title, - "GITHUB_ISSUE_URL": issue_url, - "GITHUB_INSTALLATION_ID": str(installation_id), + # Source-provider-agnostic metadata (see docs/adr/0001-source-provider-abstraction.md). + "SOURCE_REPO": repo_full, + "SOURCE_ISSUE_NUMBER": str(issue_number), + "SOURCE_EVENT_ACTION": str(action), + "SOURCE_ISSUE_TITLE": issue_title, + "SOURCE_ISSUE_BODY": issue_body, + "SOURCE_ISSUE_URL": issue_url, + "SOURCE_INSTALLATION_ID": str(installation_id), }, github_token_secret_name=token_secret_name, ) diff --git a/app/requirements-dev.txt b/app/requirements-dev.txt index b420d48..82ebd9b 100644 --- a/app/requirements-dev.txt +++ b/app/requirements-dev.txt @@ -1,5 +1,5 @@ -r requirements.txt -pytest>=8.0,<9.0 +pytest>=8.0 pytest-asyncio>=0.23,<1.0 respx>=0.21,<1.0 ruff>=0.8,<1.0 diff --git a/docs/adr/0001-source-provider-abstraction.md b/docs/adr/0001-source-provider-abstraction.md index a31d62e..8a029d6 100644 --- a/docs/adr/0001-source-provider-abstraction.md +++ b/docs/adr/0001-source-provider-abstraction.md @@ -48,7 +48,6 @@ For GitHub, this means: ## Non-Goals - No GitLab, Gitea, Forgejo, or Linear implementation in this change. -- No worker environment variable migration yet. - No label state machine changes. - No change to the existing `ai-pr-*` trigger behavior. - No replacement of Kubernetes job orchestration. @@ -70,8 +69,13 @@ Tradeoffs: - Some source platforms cannot always provide short-lived repo-scoped clone credentials. Their implementations must document the best available security model. -- The workers still receive GitHub-shaped environment variables in this PR. - Generic `SOURCE_*` variables should be handled in a later migration. + +Worker jobs receive **source-agnostic metadata** as `SOURCE_*` environment +variables (for example `SOURCE_REPO`, `SOURCE_ISSUE_NUMBER`, `SOURCE_ISSUE_BODY`, +`SOURCE_ISSUE_URL`, `SOURCE_INSTALLATION_ID`, `SOURCE_EVENT_ACTION`) populated +from the active `SourceProvider`. The HTTPS clone credential still uses the +secret key **`GITHUB_TOKEN`** today (GitHub App installation token). Renaming +that credential for non-GitHub hosts is a separate change. ## Validation diff --git a/images/worker-aider/Dockerfile b/images/worker-aider/Dockerfile index bf552e9..e8b02b2 100644 --- a/images/worker-aider/Dockerfile +++ b/images/worker-aider/Dockerfile @@ -18,8 +18,9 @@ RUN pip install --user aider-chat WORKDIR /app COPY --chown=worker:worker images/worker-aider/run.sh /app/run.sh COPY --chown=worker:worker providers/ /app/providers/ -RUN sed -i 's/\r$//' /app/run.sh /app/providers/*.sh \ - && chmod +x /app/run.sh /app/providers/*.sh +COPY --chown=worker:worker prompt/ /app/prompt/ +RUN sed -i 's/\r$//' /app/run.sh /app/providers/*.sh /app/prompt/*.sh \ + && chmod +x /app/run.sh /app/providers/*.sh /app/prompt/*.sh ENV PATH="/home/worker/.local/bin:${PATH}" WORKDIR /work diff --git a/images/worker-aider/run.sh b/images/worker-aider/run.sh index bfba814..54519ea 100644 --- a/images/worker-aider/run.sh +++ b/images/worker-aider/run.sh @@ -4,12 +4,12 @@ set -euo pipefail echo "=== worker start ===" echo "TIME: $(date -u --iso-8601=seconds)" echo "AI_PROVIDER=${AI_PROVIDER:-aider}" -echo "GITHUB_REPO=${GITHUB_REPO:-}" -echo "GITHUB_ISSUE_NUMBER=${GITHUB_ISSUE_NUMBER:-}" -echo "GITHUB_INSTALLATION_ID=${GITHUB_INSTALLATION_ID:-}" +echo "SOURCE_REPO=${SOURCE_REPO:-}" +echo "SOURCE_ISSUE_NUMBER=${SOURCE_ISSUE_NUMBER:-}" +echo "SOURCE_INSTALLATION_ID=${SOURCE_INSTALLATION_ID:-}" if [[ "${DEBUG_ENV:-0}" == "1" ]]; then echo "---- env (whitelist) ----" - printenv | grep -E '^(AI_PROVIDER|GITHUB_REPO|GITHUB_ISSUE_NUMBER|GITHUB_INSTALLATION_ID|NAMESPACE|JOB_IMAGE|HOME|PATH)=' || true + printenv | grep -E '^(AI_PROVIDER|SOURCE_REPO|SOURCE_ISSUE_NUMBER|SOURCE_INSTALLATION_ID|NAMESPACE|JOB_IMAGE|HOME|PATH)=' || true echo "---- end env ----" fi diff --git a/images/worker-claude/Dockerfile b/images/worker-claude/Dockerfile index f5107ee..2b182a2 100644 --- a/images/worker-claude/Dockerfile +++ b/images/worker-claude/Dockerfile @@ -18,8 +18,9 @@ RUN curl -fsSL https://claude.ai/install.sh | bash WORKDIR /app COPY --chown=worker:worker images/worker-claude/run.sh /app/run.sh COPY --chown=worker:worker providers/ /app/providers/ -RUN sed -i 's/\r$//' /app/run.sh /app/providers/*.sh \ - && chmod +x /app/run.sh /app/providers/*.sh +COPY --chown=worker:worker prompt/ /app/prompt/ +RUN sed -i 's/\r$//' /app/run.sh /app/providers/*.sh /app/prompt/*.sh \ + && chmod +x /app/run.sh /app/providers/*.sh /app/prompt/*.sh ENV PATH="/home/worker/.local/bin:${PATH}" WORKDIR /work diff --git a/images/worker-claude/run.sh b/images/worker-claude/run.sh index 05e63ba..b96b18c 100644 --- a/images/worker-claude/run.sh +++ b/images/worker-claude/run.sh @@ -4,12 +4,12 @@ set -euo pipefail echo "=== worker start ===" echo "TIME: $(date -u --iso-8601=seconds)" echo "AI_PROVIDER=${AI_PROVIDER:-claude_code}" -echo "GITHUB_REPO=${GITHUB_REPO:-}" -echo "GITHUB_ISSUE_NUMBER=${GITHUB_ISSUE_NUMBER:-}" -echo "GITHUB_INSTALLATION_ID=${GITHUB_INSTALLATION_ID:-}" +echo "SOURCE_REPO=${SOURCE_REPO:-}" +echo "SOURCE_ISSUE_NUMBER=${SOURCE_ISSUE_NUMBER:-}" +echo "SOURCE_INSTALLATION_ID=${SOURCE_INSTALLATION_ID:-}" if [[ "${DEBUG_ENV:-0}" == "1" ]]; then echo "---- env (whitelist) ----" - printenv | grep -E '^(AI_PROVIDER|GITHUB_REPO|GITHUB_ISSUE_NUMBER|GITHUB_INSTALLATION_ID|NAMESPACE|JOB_IMAGE|HOME|PATH)=' || true + printenv | grep -E '^(AI_PROVIDER|SOURCE_REPO|SOURCE_ISSUE_NUMBER|SOURCE_INSTALLATION_ID|NAMESPACE|JOB_IMAGE|HOME|PATH)=' || true echo "---- end env ----" fi diff --git a/images/worker-codex/Dockerfile b/images/worker-codex/Dockerfile index 2e9a539..7ac0a7a 100644 --- a/images/worker-codex/Dockerfile +++ b/images/worker-codex/Dockerfile @@ -25,8 +25,9 @@ RUN git config --global user.name "patchwork-agent" \ WORKDIR /app COPY --chown=worker:worker images/worker-codex/run.sh /app/run.sh COPY --chown=worker:worker providers/ /app/providers/ -RUN sed -i 's/\r$//' /app/run.sh /app/providers/*.sh \ - && chmod +x /app/run.sh /app/providers/*.sh +COPY --chown=worker:worker prompt/ /app/prompt/ +RUN sed -i 's/\r$//' /app/run.sh /app/providers/*.sh /app/prompt/*.sh \ + && chmod +x /app/run.sh /app/providers/*.sh /app/prompt/*.sh WORKDIR /work diff --git a/images/worker-codex/run.sh b/images/worker-codex/run.sh index 86fcfd9..047eae4 100644 --- a/images/worker-codex/run.sh +++ b/images/worker-codex/run.sh @@ -4,12 +4,12 @@ set -euo pipefail echo "=== worker start ===" echo "TIME: $(date -u --iso-8601=seconds)" echo "AI_PROVIDER=${AI_PROVIDER:-openai}" -echo "GITHUB_REPO=${GITHUB_REPO:-}" -echo "GITHUB_ISSUE_NUMBER=${GITHUB_ISSUE_NUMBER:-}" -echo "GITHUB_INSTALLATION_ID=${GITHUB_INSTALLATION_ID:-}" +echo "SOURCE_REPO=${SOURCE_REPO:-}" +echo "SOURCE_ISSUE_NUMBER=${SOURCE_ISSUE_NUMBER:-}" +echo "SOURCE_INSTALLATION_ID=${SOURCE_INSTALLATION_ID:-}" if [[ "${DEBUG_ENV:-0}" == "1" ]]; then echo "---- env (whitelist) ----" - printenv | grep -E '^(AI_PROVIDER|GITHUB_REPO|GITHUB_ISSUE_NUMBER|GITHUB_INSTALLATION_ID|NAMESPACE|JOB_IMAGE|HOME|PATH)=' || true + printenv | grep -E '^(AI_PROVIDER|SOURCE_REPO|SOURCE_ISSUE_NUMBER|SOURCE_INSTALLATION_ID|NAMESPACE|JOB_IMAGE|HOME|PATH)=' || true echo "---- end env ----" fi diff --git a/k8s/ai-issue-aider.yaml b/k8s/ai-issue-aider.yaml index be1d5d5..6f3e97f 100644 --- a/k8s/ai-issue-aider.yaml +++ b/k8s/ai-issue-aider.yaml @@ -25,10 +25,10 @@ spec: # Ephemeral installation token (generate via GitHub App or use a PAT) - name: GITHUB_TOKEN value: "CHANGE_ME" - # --- Fill in before running --- - - name: GITHUB_REPO + # --- Fill in before running (SOURCE_* = provider-agnostic metadata) --- + - name: SOURCE_REPO value: "CHANGE_ME" - - name: GITHUB_ISSUE_NUMBER + - name: SOURCE_ISSUE_NUMBER value: "CHANGE_ME" - - name: GITHUB_INSTALLATION_ID + - name: SOURCE_INSTALLATION_ID value: "CHANGE_ME" diff --git a/k8s/ai-issue-claude.yaml b/k8s/ai-issue-claude.yaml index a31f79f..53ae2f7 100644 --- a/k8s/ai-issue-claude.yaml +++ b/k8s/ai-issue-claude.yaml @@ -23,10 +23,10 @@ spec: # Ephemeral installation token (generate via GitHub App or use a PAT) - name: GITHUB_TOKEN value: "CHANGE_ME" - # --- Fill in before running --- - - name: GITHUB_REPO + # --- Fill in before running (SOURCE_* = provider-agnostic metadata) --- + - name: SOURCE_REPO value: "CHANGE_ME" - - name: GITHUB_ISSUE_NUMBER + - name: SOURCE_ISSUE_NUMBER value: "CHANGE_ME" - - name: GITHUB_INSTALLATION_ID + - name: SOURCE_INSTALLATION_ID value: "CHANGE_ME" diff --git a/k8s/ai-issue-codex.yaml b/k8s/ai-issue-codex.yaml index b941d56..23a6ef0 100644 --- a/k8s/ai-issue-codex.yaml +++ b/k8s/ai-issue-codex.yaml @@ -23,10 +23,10 @@ spec: # Ephemeral installation token (generate via GitHub App or use a PAT) - name: GITHUB_TOKEN value: "CHANGE_ME" - # --- Fill in before running --- - - name: GITHUB_REPO + # --- Fill in before running (SOURCE_* = provider-agnostic metadata) --- + - name: SOURCE_REPO value: "CHANGE_ME" - - name: GITHUB_ISSUE_NUMBER + - name: SOURCE_ISSUE_NUMBER value: "CHANGE_ME" - - name: GITHUB_INSTALLATION_ID + - name: SOURCE_INSTALLATION_ID value: "CHANGE_ME" diff --git a/prompt/issue_prompt.sh b/prompt/issue_prompt.sh new file mode 100644 index 0000000..1767358 --- /dev/null +++ b/prompt/issue_prompt.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# issue_prompt.sh — append optional SOURCE_ISSUE_BODY to a base worker prompt. +# Sourced from providers/*.sh via PROMPT_DIR (repo: prompt/ ; image: /app/prompt/). +# Uses: SOURCE_ISSUE_BODY (optional), set by orchestrator from SourceProvider. + +issue_append_issue_body() { + local base="$1" + if [ -n "${SOURCE_ISSUE_BODY:-}" ]; then + printf '%s\n\nIssue description:\n%s' "$base" "${SOURCE_ISSUE_BODY}" + else + printf '%s' "$base" + fi +} diff --git a/prompt/issue_start_prompt.sh b/prompt/issue_start_prompt.sh new file mode 100644 index 0000000..48a1671 --- /dev/null +++ b/prompt/issue_start_prompt.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# issue_start_prompt.sh — opening task instructions for all AI worker CLIs. +# Requires: ISSUE_NUMBER, SOURCE_ISSUE_TITLE (env; set by git_workflow / orchestrator). + +issue_start_prompt() { + cat < dict[str, str]: + _ns, job = batch.created_jobs[0] + container = job.spec.template.spec.containers[0] + return {e.name: e.value for e in container.env if getattr(e, "value", None) is not None} + + +async def _post_labeled_claude_webhook(monkeypatch, payload: dict): + """Simulate validated webhook: issues event, ai-pr-claude label, fake k8s.""" + event = WebhookEvent( + type="issue_labeled", + actor="bob", + repo=payload["repository"]["full_name"], + label="ai-pr-claude", + raw=payload, + ) + batch = FakeBatch() + core = FakeCore() + monkeypatch.setattr(orchestrator, "get_source_provider", lambda: FakeSourceProvider(event=event)) + monkeypatch.setattr(orchestrator, "load_k8s_client", lambda: (batch, core)) + response = await post_webhook( + json.dumps(payload).encode("utf-8"), + {"X-GitHub-Event": "issues"}, + ) + return response, batch, core + + @pytest.mark.asyncio async def test_github_webhook_rejects_invalid_signature(monkeypatch): monkeypatch.setattr(orchestrator, "get_source_provider", lambda: FakeSourceProvider(verify=False)) @@ -81,16 +108,7 @@ async def test_github_webhook_ignores_non_issue_event(monkeypatch): @pytest.mark.asyncio async def test_github_webhook_triggers_worker_job(monkeypatch): payload = load_payload("issue_labeled.json") - event = WebhookEvent(type="issue_labeled", actor="bob", repo="acme/widgets", label="ai-pr-claude", raw=payload) - batch = FakeBatch() - core = FakeCore() - monkeypatch.setattr(orchestrator, "get_source_provider", lambda: FakeSourceProvider(event=event)) - monkeypatch.setattr(orchestrator, "load_k8s_client", lambda: (batch, core)) - - response = await post_webhook( - json.dumps(payload).encode("utf-8"), - {"X-GitHub-Event": "issues"}, - ) + response, batch, core = await _post_labeled_claude_webhook(monkeypatch, payload) assert response.status_code == 200 data = response.json() @@ -102,3 +120,46 @@ async def test_github_webhook_triggers_worker_job(monkeypatch): assert len(core.secrets) == 1 assert core.secrets[0][1].string_data == {"GITHUB_TOKEN": "installation-token"} assert len(batch.created_jobs) == 1 + env_plain = _plain_env_from_job(batch) + assert env_plain["SOURCE_ISSUE_BODY"] == payload["issue"]["body"] + + +@pytest.mark.asyncio +async def test_github_webhook_issue_body_truncated(monkeypatch): + payload = copy.deepcopy(load_payload("issue_labeled.json")) + cap = orchestrator._MAX_ISSUE_BODY_CHARS + payload["issue"]["body"] = "Z" * (cap + 500) + + response, batch, _core = await _post_labeled_claude_webhook(monkeypatch, payload) + + assert response.status_code == 200 + assert response.json()["triggered"] is True + env_plain = _plain_env_from_job(batch) + assert len(env_plain["SOURCE_ISSUE_BODY"]) == cap + assert env_plain["SOURCE_ISSUE_BODY"] == "Z" * cap + + +@pytest.mark.asyncio +async def test_github_webhook_issue_body_missing(monkeypatch): + payload = copy.deepcopy(load_payload("issue_labeled.json")) + del payload["issue"]["body"] + + response, batch, _core = await _post_labeled_claude_webhook(monkeypatch, payload) + + assert response.status_code == 200 + assert response.json()["triggered"] is True + env_plain = _plain_env_from_job(batch) + assert env_plain["SOURCE_ISSUE_BODY"] == "" + + +@pytest.mark.asyncio +async def test_github_webhook_issue_body_non_string(monkeypatch): + payload = copy.deepcopy(load_payload("issue_labeled.json")) + payload["issue"]["body"] = ["unexpected", "list"] + + response, batch, _core = await _post_labeled_claude_webhook(monkeypatch, payload) + + assert response.status_code == 200 + assert response.json()["triggered"] is True + env_plain = _plain_env_from_job(batch) + assert env_plain["SOURCE_ISSUE_BODY"] == "" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..6d8b695 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,11 @@ +"""Pytest bootstrap: repo root must be on sys.path for `app` and `providers`.""" + +from __future__ import annotations + +import sys +from pathlib import Path + +_ROOT = Path(__file__).resolve().parent.parent +_root_s = str(_ROOT) +if _root_s not in sys.path: + sys.path.insert(0, _root_s)