diff --git a/.github/workflows/secret-scan.yml b/.github/workflows/secret-scan.yml index 2ed9b2c..cb35c5d 100644 --- a/.github/workflows/secret-scan.yml +++ b/.github/workflows/secret-scan.yml @@ -14,6 +14,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Run gitleaks uses: gitleaks/gitleaks-action@v2 env: diff --git a/README.md b/README.md index 0592dad..055cd58 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Kubernetes orchestrator that turns GitHub issues into pull requests using AI age This project automates the **Issue -> Label -> Pull Request** flow: an `ai-pr-*` label on an issue triggers an AI worker that clones the repo, solves the problem, and opens a PR. -It avoids vendor lock-in with 3 built-in providers: +It avoids AI vendor lock-in with 3 built-in worker providers: | Label | Provider | Backend | | -------------- | ----------- | ----------------------- | @@ -14,7 +14,12 @@ It avoids vendor lock-in with 3 built-in providers: | `ai-pr-codex` | Codex | OpenAI | | `ai-pr-aider` | Aider | OpenRouter (extensible) | -The architecture is designed to easily add more providers (see `CONTRIBUTING.md`). +The source hosting layer is abstracted behind `SourceProvider`; GitHub is the +only built-in source provider today. See +`docs/adr/0001-source-provider-abstraction.md` for the design decision. + +The worker architecture is designed to easily add more AI providers (see +`CONTRIBUTING.md`). Tested on: VPS / 8 GB RAM / 4 vCPU / k3s single-node. @@ -33,11 +38,12 @@ GitHub Issue (label ai-pr-*) POST /webhook/github | v - +-------------------+ - | Orchestrator | Deployment FastAPI - | app/app.py | - +--------+----------+ - | creates a K8s Job based on the provider + +-------------------+ + | Orchestrator | Deployment FastAPI + | app/app.py | + | providers/source | GitHub webhook + clone credentials + +--------+----------+ + | creates a K8s Job based on the AI worker provider v +----------------+ +----------------+ +----------------+ | worker-claude | | worker-codex | | worker-aider | @@ -48,7 +54,9 @@ GitHub Issue (label ai-pr-*) clone > AI fix > commit > push > PR ``` -**GitHub auth flow**: the orchestrator generates an ephemeral installation token (1h) via GitHub App JWT. Workers never receive the PEM key. +**Source auth flow**: `GitHubProvider` generates an ephemeral installation token +(1h) via GitHub App JWT and returns git clone credentials to the orchestrator. +Workers receive only the short-lived token and never receive the PEM key. --- @@ -57,7 +65,7 @@ GitHub Issue (label ai-pr-*) ### 1. Prerequisites - A VPS (or machine) with 4 vCPU / 8 GB RAM minimum -- API keys for your desired providers +- API keys for your desired AI worker providers - **Ansible option**: `ansible` installed locally + SSH root access to the VPS - **Manual option**: k3s, Docker, and `kubectl` installed on the VPS @@ -251,22 +259,24 @@ curl -s -X POST http://127.0.0.1:8080/jobs/run -H "Authorization: Bearer 30 min (token expires at 1h) -- Regularly rotate `WEBHOOK_SECRET` and `ADMIN_TOKEN` -- See `SECURITY.md` for vulnerability reporting +- Restrict RBAC access to Secrets and Jobs +- Monitor jobs > 30 min (token expires at 1h) +- Regularly rotate `WEBHOOK_SECRET` and `ADMIN_TOKEN` +- Review any new `SourceProvider` for webhook verification, credential scope, + and logging behavior +- See `SECURITY.md` for vulnerability reporting and provider security rules --- @@ -292,9 +302,10 @@ sudo systemctl status k3s --no-pager -l ```text . -|-- app/ -| |-- app.py # FastAPI Orchestrator -| `-- requirements.txt +|-- app/ +| |-- app.py # FastAPI Orchestrator +| |-- config.py # Runtime env/config +| `-- requirements.txt |-- images/ | |-- orchestrator/Dockerfile | |-- worker-claude/ # Dockerfile + run.sh @@ -307,11 +318,12 @@ sudo systemctl status k3s --no-pager -l | |-- ai-issue-*.yaml # Manual jobs per provider | |-- debug-*.yaml # Debug jobs per provider | `-- secrets/ # Templates (no values) -|-- providers/ -| |-- git_workflow.sh # Shared Git logic -| |-- claude_code.sh -| |-- openai.sh -| `-- aider.sh +|-- providers/ +| |-- source/ # SourceProvider interface + GitHub implementation +| |-- git_workflow.sh # Shared Git logic +| |-- claude_code.sh +| |-- openai.sh +| `-- aider.sh |-- ansible/ | |-- playbook.yml # Full VPS deployment | |-- inventory.ini @@ -319,9 +331,9 @@ sudo systemctl status k3s --no-pager -l | |-- inventory-prod.ini # gitignored | |-- requirements.yml # Ansible collections | `-- group_vars/vps.yml -|-- docs/ -| |-- catalog-info.yaml # Backstage service catalog -| `-- workspace.dsl # C4 architecture (Structurizr) +|-- docs/ +| |-- adr/ # Architecture decision records +| `-- workspace.dsl # C4 architecture (Structurizr) |-- .github/ | `-- workflows/secret-scan.yml # CI secret scanning |-- CONTRIBUTING.md diff --git a/SECURITY.md b/SECURITY.md index f8c35ef..3a790ee 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -19,9 +19,30 @@ If your report contains secrets, rotate them immediately after sharing. ## Scope Notes -- This project handles sensitive material (GitHub App private key, API keys, webhook secret, admin token). +- This project handles sensitive material: source-provider credentials, AI API keys, webhook secrets, admin tokens, and short-lived git clone tokens. +- GitHub is currently the only built-in source provider. Its App private key must stay in the orchestrator pod only. +- Workers must receive only short-lived source credentials, never long-lived source-provider private keys. - Never commit real secret values to git history. - Kubernetes secret manifests under `k8s/secrets/` are templates only. +- Webhook fixture files under `tests/` must be anonymized and must not contain real repository names, users, tokens, signatures, private issue content, or internal URLs. + +## Source Provider Security + +Source providers live under `providers/source/` and own provider-specific webhook +verification, event parsing, API calls, and git clone credentials. + +Provider implementations must: + +- Verify webhook authenticity before parsing or acting on the payload. +- Treat webhook bodies, `raw` payloads, comments, issue bodies, and PR bodies as untrusted input. +- Return the shortest-lived and narrowest-scoped git credentials available from `get_clone_credentials(repo)`. +- Avoid logging tokens, webhook signatures, private keys, issue bodies, comments, or full raw payloads. +- Keep provider-specific secrets in the orchestrator, not in worker images or manifests. +- Document any provider that cannot issue short-lived repo-scoped credentials. + +For GitHub, `get_clone_credentials(repo)` uses a GitHub App installation token. +Workers receive that token through an ephemeral Kubernetes Secret and do not +receive the GitHub App PEM key. ## Hardening Expectations @@ -29,3 +50,7 @@ If your report contains secrets, rotate them immediately after sharing. - Keep admin endpoints (`/secrets/github-app`, `/jobs/run`) private. - Use strong random `ADMIN_TOKEN` values and rotate credentials regularly. - Run workers only in isolated, ephemeral environments. +- Keep worker ServiceAccount permissions minimal. Workers should not need access to Kubernetes Secrets. +- Prefer a secrets operator such as Sealed Secrets or External Secrets for production deployments. +- Rotate webhook secrets and source-provider credentials after suspected exposure. +- Review new source-provider implementations for webhook verification, credential lifetime, token scope, and logging behavior before enabling them. diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/app/__init__.py @@ -0,0 +1 @@ + diff --git a/app/app.py b/app/app.py index fadc58d..5ff971a 100644 --- a/app/app.py +++ b/app/app.py @@ -1,508 +1,490 @@ -#!/usr/bin/env python3 -""" -app.py - FastAPI service to manage a GitHub App secret and create Kubernetes Jobs. -... -(la même docstring) -""" - -from __future__ import annotations -import os -import re -import base64 -import typing as t -from dataclasses import dataclass -from datetime import datetime, timezone -import uuid -import logging -import secrets -import time - -from fastapi import FastAPI, HTTPException, Body, Request, Depends -from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials -from pydantic import BaseModel, Field -from kubernetes import client, config -from kubernetes.client.rest import ApiException -import hmac -import hashlib -import jwt -import httpx - -# --- Config / env --- -NAMESPACE = os.getenv("NAMESPACE", "ai-bot") -JOB_TTL_SECONDS = int(os.getenv("JOB_TTL_SECONDS", "3600")) -TRIGGER_PREFIX = os.getenv("TRIGGER_PREFIX", "ai-pr-") -WEBHOOK_SECRET = os.getenv("WEBHOOK_SECRET", "") # required in prod -ADMIN_TOKEN = os.getenv("ADMIN_TOKEN", "") # bearer token for admin endpoints -ENABLE_K8S_DEBUG = os.getenv("ENABLE_K8S_DEBUG", "false").lower() in ("1", "true", "yes") - -# Worker images (configurable via env) -CLAUDE_WORKER_IMAGE = os.getenv("CLAUDE_WORKER_IMAGE", "worker-claude:latest") -CODEX_WORKER_IMAGE = os.getenv("CODEX_WORKER_IMAGE", "worker-codex:latest") -AIDER_WORKER_IMAGE = os.getenv("AIDER_WORKER_IMAGE", "worker-aider:latest") - -# GitHub App credentials (used to generate ephemeral installation tokens) -GITHUB_APP_ID = os.getenv("GITHUB_APP_ID", "") -GITHUB_PRIVATE_KEY = os.getenv("GITHUB_PRIVATE_KEY", "") - - -@dataclass(frozen=True) -class ProviderSecretRef: - env_name: str - secret_name: str - secret_key: str - - -@dataclass(frozen=True) -class ProviderConfig: - image: str - ai_provider: str - api_secret: ProviderSecretRef - extra_env: tuple[tuple[str, str], ...] = () - extra_secrets: tuple[ProviderSecretRef, ...] = () - - -# Per-provider config: image and API key secret -PROVIDER_CONFIG: dict[str, ProviderConfig] = { - "claude": ProviderConfig( - image=CLAUDE_WORKER_IMAGE, - ai_provider="claude_code", - api_secret=ProviderSecretRef("ANTHROPIC_API_KEY", "anthropic-api-key", "ANTHROPIC_API_KEY"), - ), - "codex": ProviderConfig( - image=CODEX_WORKER_IMAGE, - ai_provider="openai", - api_secret=ProviderSecretRef("OPENAI_API_KEY", "openai-api-key", "OPENAI_API_KEY"), - ), - "aider": ProviderConfig( - image=AIDER_WORKER_IMAGE, - ai_provider="aider", - api_secret=ProviderSecretRef("OPENROUTER_API_KEY", "openrouter-api-key", "OPENROUTER_API_KEY"), - ), -} - -# --- Logging setup --- -# Use uvicorn's logger so messages aren't disabled by uvicorn's dictConfig -logger = logging.getLogger("uvicorn.error") - -if ENABLE_K8S_DEBUG: - # debug du client k8s / urllib3 (affiche les requêtes HTTP vers l'API server) - logging.getLogger("kubernetes").setLevel(logging.DEBUG) - logging.getLogger("urllib3").setLevel(logging.DEBUG) - logger.warning("Kubernetes client debug ENABLED (ENABLE_K8S_DEBUG=true) - do NOT enable in production if logs leak secrets") - -app = FastAPI(title="orchestrator", version="1.0") -bearer_scheme = HTTPBearer(auto_error=False) - - -def verify_admin_token(credentials: HTTPAuthorizationCredentials | None = Depends(bearer_scheme)): - """Require a valid bearer token for admin endpoints.""" - if not ADMIN_TOKEN: - raise HTTPException(status_code=503, detail="ADMIN_TOKEN not configured") - if not credentials or not secrets.compare_digest(credentials.credentials, ADMIN_TOKEN): - raise HTTPException(status_code=401, detail="invalid or missing bearer token") - return True - -def load_k8s_client(): - """ - Load kube config: prefer in-cluster, fallback to KUBECONFIG if set, else raise. - Returns BatchV1Api and CoreV1Api clients. - """ - try: - config.load_incluster_config() - except Exception: - kubeconfig = os.getenv("KUBECONFIG") - if kubeconfig: - config.load_kube_config(config_file=kubeconfig) - else: - # last resort: try default kube config - config.load_kube_config() - return client.BatchV1Api(), client.CoreV1Api() - -def safe_name(s: str) -> str: - s2 = re.sub(r"[^a-z0-9-]+", "-", s.lower()).strip("-") - return (s2[:50] or "job") - -async def _generate_installation_token(installation_id: str) -> str: - """Generate an ephemeral GitHub installation token (1h) from App credentials.""" - if not GITHUB_APP_ID or not GITHUB_PRIVATE_KEY: - raise HTTPException(status_code=500, detail="GITHUB_APP_ID and GITHUB_PRIVATE_KEY must be configured") - - now = int(time.time()) - payload = { - "iat": now - 60, - "exp": now + 600, - "iss": GITHUB_APP_ID, - } - encoded_jwt = jwt.encode(payload, GITHUB_PRIVATE_KEY, algorithm="RS256") - - url = f"https://api.github.com/app/installations/{installation_id}/access_tokens" - headers = { - "Authorization": f"Bearer {encoded_jwt}", - "Accept": "application/vnd.github+json", - } - - async with httpx.AsyncClient() as http_client: - resp = await http_client.post(url, headers=headers) - - if resp.status_code != 201: - logger.error("GitHub token exchange failed: status=%s body=%s", resp.status_code, resp.text[:500]) - raise HTTPException(status_code=500, detail=f"GitHub installation token exchange failed ({resp.status_code})") - - token = resp.json().get("token") - if not token: - raise HTTPException(status_code=500, detail="GitHub API returned no token") - - return token - - -def _build_worker_job( - job_name: str, - cfg: ProviderConfig, - provider: str, - env_vars: dict[str, str], - github_token_secret_name: str, -) -> client.V1Job: - """Build a K8s Job object for a worker pod.""" - env_list = [ - client.V1EnvVar(name="AI_PROVIDER", value=cfg.ai_provider), - client.V1EnvVar( - name="GITHUB_TOKEN", - value_from=client.V1EnvVarSource( - secret_key_ref=client.V1SecretKeySelector(name=github_token_secret_name, key="GITHUB_TOKEN") - ), - ), - ] - for k, v in env_vars.items(): - env_list.append(client.V1EnvVar(name=k, value=v)) - - # Provider-specific extra env vars (plain values) - for env_key, env_val in cfg.extra_env: - env_list.append(client.V1EnvVar(name=env_key, value=env_val)) - - # Provider-specific API key - api_secret = cfg.api_secret - env_list.append( - client.V1EnvVar( - name=api_secret.env_name, - value_from=client.V1EnvVarSource( - secret_key_ref=client.V1SecretKeySelector(name=api_secret.secret_name, key=api_secret.secret_key) - ), - ) - ) - - # Provider-specific extra secrets (e.g. model selection) - for ref in cfg.extra_secrets: - env_list.append( - client.V1EnvVar( - name=ref.env_name, - value_from=client.V1EnvVarSource( - secret_key_ref=client.V1SecretKeySelector(name=ref.secret_name, key=ref.secret_key) - ), - ) - ) - - container = client.V1Container( - name="worker", image=cfg.image, image_pull_policy="Never", - env=env_list, - ) - template = client.V1PodTemplateSpec( - metadata=client.V1ObjectMeta(labels={"job-name": job_name, "provider": provider}), - spec=client.V1PodSpec(restart_policy="Never", containers=[container]), - ) - job_spec = client.V1JobSpec(template=template, backoff_limit=0, ttl_seconds_after_finished=JOB_TTL_SECONDS) - return client.V1Job(metadata=client.V1ObjectMeta(name=job_name, namespace=NAMESPACE), spec=job_spec) - - -def _create_or_replace_secret(core: client.CoreV1Api, name: str, string_data: dict[str, str]) -> None: - body = client.V1Secret( - metadata=client.V1ObjectMeta(name=name, namespace=NAMESPACE), - type="Opaque", - string_data=string_data, - ) - try: - core.create_namespaced_secret(namespace=NAMESPACE, body=body) - except ApiException as e: - if e.status == 409: - core.patch_namespaced_secret(name=name, namespace=NAMESPACE, body=body) - else: - raise - - -def _delete_secret_if_exists(core: client.CoreV1Api, name: str) -> None: - try: - core.delete_namespaced_secret(name=name, namespace=NAMESPACE) - except ApiException as e: - if e.status != 404: - raise - - -def _attach_job_owner_to_secret(core: client.CoreV1Api, secret_name: str, job_name: str, job_uid: str) -> None: - owner_ref = client.V1OwnerReference( - api_version="batch/v1", - kind="Job", - name=job_name, - uid=job_uid, - controller=False, - block_owner_deletion=False, - ) - patch_body = {"metadata": {"ownerReferences": [owner_ref.to_dict()]}} - core.patch_namespaced_secret(name=secret_name, namespace=NAMESPACE, body=patch_body) - -class SecretPayload(BaseModel): - github_app_id: str = Field(..., alias="GITHUB_APP_ID") - github_private_key_pem: t.Optional[str] = Field(None, alias="GITHUB_PRIVATE_KEY_PEM") - github_private_key_path: t.Optional[str] = Field(None, alias="GITHUB_PRIVATE_KEY_PATH") - replace: bool = Field(True, description="If true, apply/replace the secret (default true)") - -class JobSpec(BaseModel): - name: str - image: t.Optional[str] = CLAUDE_WORKER_IMAGE - command: t.Optional[t.List[str]] = None - env: t.Optional[t.Dict[str, str]] = None - backoff_limit: int = 0 - -@app.get("/healthz") -def healthz(): - return {"ok": True} - -@app.post("/secrets/github-app", status_code=201) -def create_or_update_github_app_secret(payload: SecretPayload = Body(...), _auth: bool = Depends(verify_admin_token)): - batch, core = load_k8s_client() - name = "github-app" - data = {} - - data["GITHUB_APP_ID"] = payload.github_app_id.encode("utf-8") - pem_bytes: bytes | None = None - if payload.github_private_key_pem: - pem_bytes = payload.github_private_key_pem.encode("utf-8") - elif payload.github_private_key_path: - path = payload.github_private_key_path - if not os.path.isfile(path): - raise HTTPException(status_code=400, detail=f"private key not found at path: {path}") - with open(path, "rb") as fh: - pem_bytes = fh.read() - else: - raise HTTPException(status_code=400, detail="either GITHUB_PRIVATE_KEY_PEM or GITHUB_PRIVATE_KEY_PATH is required") - - data_b64 = {k: base64.b64encode(v).decode("utf-8") for k, v in {"GITHUB_APP_ID": data["GITHUB_APP_ID"], "GITHUB_PRIVATE_KEY": pem_bytes}.items()} - - secret = client.V1Secret( - metadata=client.V1ObjectMeta(name=name, namespace=NAMESPACE), - type="Opaque", - data=data_b64, - ) - - try: - core.patch_namespaced_secret(name=name, namespace=NAMESPACE, body=secret) - action = "patched" - except ApiException as e: - if e.status == 404: - core.create_namespaced_secret(namespace=NAMESPACE, body=secret) - action = "created" - else: - logger.exception("k8s error when creating/patching secret: status=%s reason=%s", getattr(e,'status',None), getattr(e,'reason',None)) - raise HTTPException(status_code=500, detail=f"k8s error: {e.reason} ({getattr(e, 'status', '')})") - return {"result": action, "secret": name, "namespace": NAMESPACE, "updated_at": datetime.now(timezone.utc).isoformat()} - -@app.post("/webhook/github") -async def github_webhook(request: Request): - body = await request.body() - - sig256 = request.headers.get("X-Hub-Signature-256") - if not verify_github_signature(WEBHOOK_SECRET, body, sig256): - logger.warning("Invalid webhook signature") - raise HTTPException(status_code=401, detail="invalid webhook signature") - - event = request.headers.get("X-GitHub-Event", "") - if event != "issues": - logger.info("Ignored event=%s", event) - return {"ok": True, "ignored": True, "reason": f"event={event}"} - - payload = await request.json() - action = payload.get("action") - - logger.info("Webhook received: event=issues action=%s repo=%s", action, ((payload.get("repository") or {}).get("full_name"))) - - logger.debug("TRIGGER_PREFIX: %s", TRIGGER_PREFIX) - logger.debug("PAYLOAD LABEL: %s", payload.get("label")) - - # Decide whether we trigger — match any label starting with TRIGGER_PREFIX - # e.g. "ai-pr-claude", "ai-pr-openai" - provider = None - - if action == "opened": - provider = issue_find_provider(payload, TRIGGER_PREFIX) - elif action == "labeled": - label = payload.get("label") or {} - label_name = label.get("name") if isinstance(label, dict) else str(label) - provider = _extract_provider_from_label(label_name, TRIGGER_PREFIX) - # else: action not handled - - if not provider: - reason = f"no label matching {TRIGGER_PREFIX}* for action={action}" - logger.info("Not triggering: %s", reason) - return {"ok": True, "ignored": True, "reason": reason} - - # Validate provider before any logging or further use - cfg = PROVIDER_CONFIG.get(provider) - if not cfg: - raise HTTPException(status_code=400, detail=f"unknown provider: {provider[:20]}") - - logger.info("Triggered with provider=%s", provider) - - repo_full = ((payload.get("repository") or {}).get("full_name")) - issue = payload.get("issue") or {} - issue_number = issue.get("number") - issue_title = issue.get("title", "")[:200] - issue_url = issue.get("html_url", "") - installation_id = ((payload.get("installation") or {}).get("id")) - - if not repo_full or not issue_number: - logger.error("Missing repo_full or issue_number") - raise HTTPException(status_code=400, detail="missing repository.full_name or issue.number") - - if not installation_id: - logger.error("Missing installation.id in webhook payload") - raise HTTPException(status_code=400, detail="missing installation.id (is the GitHub App installed?)") - - job_name = safe_name(f"ai-pr-{repo_full.replace('/', '-')}-{issue_number}-{provider}") - - batch, core = load_k8s_client() - - github_token = await _generate_installation_token(str(installation_id)) - token_secret_name = safe_name(f"{job_name}-gh-token") - _create_or_replace_secret(core, token_secret_name, {"GITHUB_TOKEN": github_token}) - - job = _build_worker_job( - job_name=job_name, - cfg=cfg, - provider=provider, - env_vars={ - "GITHUB_REPO": repo_full, - "GITHUB_ISSUE_NUMBER": str(issue_number), - "GITHUB_EVENT_ACTION": str(action), - "GITHUB_ISSUE_TITLE": issue_title, - "GITHUB_ISSUE_URL": issue_url, - "GITHUB_INSTALLATION_ID": str(installation_id), - }, - github_token_secret_name=token_secret_name, - ) - - # --- Logging around create_namespaced_job --- - logger.info("Creating Job: name=%s namespace=%s image=%s", job_name, NAMESPACE, cfg.image) - # logger.debug("Job body: %s", job) # inutile en prod (peut contenir secrets) - - try: - created_obj = batch.create_namespaced_job(namespace=NAMESPACE, body=job) - created_name = getattr(created_obj.metadata, "name", None) - created_uid = getattr(created_obj.metadata, "uid", None) - logger.info("K8s Job created: name=%s uid=%s", created_name, created_uid) - if created_name and created_uid: - try: - _attach_job_owner_to_secret(core, token_secret_name, created_name, created_uid) - except Exception as ex: - logger.warning("Unable to attach ownerReference on secret %s: %s", token_secret_name, ex) - created = True - except ApiException as e: - # show useful details for debugging - logger.exception("ApiException creating job: status=%s reason=%s ", getattr(e, "status", None), getattr(e, "reason", None)) - if getattr(e, "status", None) == 409: - _delete_secret_if_exists(core, token_secret_name) - logger.info("Job already exists (idempotent): %s", job_name) - created = False - else: - _delete_secret_if_exists(core, token_secret_name) - raise HTTPException(status_code=500, detail=f"k8s error creating job: {getattr(e,'reason',None)}") - except Exception as ex: - _delete_secret_if_exists(core, token_secret_name) - logger.exception("Unexpected error creating job: %s", ex) - raise HTTPException(status_code=500, detail="internal error") - - return { - "ok": True, - "triggered": True, - "created": created, - "job": job_name, - "namespace": NAMESPACE, - "repo": repo_full, - "issue_number": issue_number, - "action": action, - "provider": provider, - "timestamp": datetime.now(timezone.utc).isoformat(), - } - -@app.post("/jobs/run") -def run_job(_auth: bool = Depends(verify_admin_token)): - job_id = str(uuid.uuid4())[:8] - job_name = f"manual-{job_id}" - - container = client.V1Container( - name="worker", - image=CLAUDE_WORKER_IMAGE, - image_pull_policy="Never", - ) - - template = client.V1PodTemplateSpec( - metadata=client.V1ObjectMeta(labels={"job-name": job_name}), - spec=client.V1PodSpec( - restart_policy="Never", - containers=[container], - ), - ) - - job_spec = client.V1JobSpec(template=template, backoff_limit=0) - - job = client.V1Job( - api_version="batch/v1", - kind="Job", - metadata=client.V1ObjectMeta(name=job_name, namespace=NAMESPACE), - spec=job_spec, - ) - - batch, _ = load_k8s_client() - logger.info("Manual run: creating job %s in %s", job_name, NAMESPACE) - try: - batch.create_namespaced_job(namespace=NAMESPACE, body=job) - except ApiException as e: - logger.exception("ApiException creating manual job: status=%s reason=%s", getattr(e, "status", None), getattr(e, "reason", None)) - raise HTTPException(status_code=500, detail=f"k8s error creating job: {getattr(e,'reason',None)}") - - return {"status": "started", "job_name": job_name} - -def verify_github_signature(secret: str, body: bytes, signature_header: str | None) -> bool: - if not secret: - logger.error("WEBHOOK_SECRET is not configured — rejecting all webhooks") - return False - if not signature_header: - return False - if not signature_header.startswith("sha256="): - return False - - their_sig = signature_header.split("=", 1)[1].strip() - mac = hmac.new(secret.encode("utf-8"), msg=body, digestmod=hashlib.sha256) - our_sig = mac.hexdigest() - return hmac.compare_digest(our_sig, their_sig) - -def _extract_provider_from_label(label_name: str | None, prefix: str) -> str | None: - """Return validated provider suffix if label matches prefix, else None.""" - if not label_name or not label_name.startswith(prefix) or len(label_name) <= len(prefix): - return None - suffix = label_name[len(prefix):] - if suffix in PROVIDER_CONFIG: - return suffix - return None - - -def issue_find_provider(payload: dict, prefix: str) -> str | None: - """Find the first label matching prefix and return the provider suffix, or None.""" - labels = (payload.get("issue") or {}).get("labels") or [] - for lb in labels: - name = lb.get("name") if isinstance(lb, dict) else str(lb) - result = _extract_provider_from_label(name, prefix) - if result: - return result - return None - -if __name__ == "__main__": - import uvicorn - uvicorn.run("app:app", host="0.0.0.0", port=int(os.getenv("PORT", "8000")), log_level="info") +#!/usr/bin/env python3 +""" +app.py - FastAPI service to manage a GitHub App secret and create Kubernetes Jobs. +... +(la même docstring) +""" + +from __future__ import annotations + +import base64 +import logging +import os +import re +import secrets +import uuid +from datetime import UTC, datetime + +from fastapi import Body, Depends, FastAPI, HTTPException, Request +from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer +from kubernetes import client, config +from kubernetes.client.rest import ApiException +from pydantic import BaseModel, Field + +from app.config import PROVIDER_CONFIG, ProviderConfig, settings +from providers.source import ( + SourceProviderAPIError, + SourceProviderConfigurationError, + SourceProviderError, + get_provider, +) + +# --- Logging setup --- +# Use uvicorn's logger so messages aren't disabled by uvicorn's dictConfig +logger = logging.getLogger("uvicorn.error") + +if settings.enable_k8s_debug: + # debug du client k8s / urllib3 (affiche les requêtes HTTP vers l'API server) + logging.getLogger("kubernetes").setLevel(logging.DEBUG) + logging.getLogger("urllib3").setLevel(logging.DEBUG) + logger.warning( + "Kubernetes client debug ENABLED (ENABLE_K8S_DEBUG=true) - do NOT enable in production if logs leak secrets" + ) + +app = FastAPI(title="orchestrator", version="1.0") +bearer_scheme = HTTPBearer(auto_error=False) + + +def verify_admin_token(credentials: HTTPAuthorizationCredentials | None = Depends(bearer_scheme)): + """Require a valid bearer token for admin endpoints.""" + if not settings.admin_token: + raise HTTPException(status_code=503, detail="ADMIN_TOKEN not configured") + if not credentials or not secrets.compare_digest(credentials.credentials, settings.admin_token): + raise HTTPException(status_code=401, detail="invalid or missing bearer token") + return True + + +def load_k8s_client(): + """ + Load kube config: prefer in-cluster, fallback to KUBECONFIG if set, else raise. + Returns BatchV1Api and CoreV1Api clients. + """ + try: + config.load_incluster_config() + except Exception: + kubeconfig = os.getenv("KUBECONFIG") + if kubeconfig: + config.load_kube_config(config_file=kubeconfig) + else: + # last resort: try default kube config + config.load_kube_config() + return client.BatchV1Api(), client.CoreV1Api() + + +def safe_name(s: str) -> str: + s2 = re.sub(r"[^a-z0-9-]+", "-", s.lower()).strip("-") + return s2[:50] or "job" + + +def get_source_provider(): + return get_provider( + "github", + app_id=settings.github_app_id, + private_key=settings.github_private_key, + webhook_secret=settings.webhook_secret, + ) + + +def _build_worker_job( + job_name: str, + cfg: ProviderConfig, + provider: str, + env_vars: dict[str, str], + github_token_secret_name: str, +) -> client.V1Job: + """Build a K8s Job object for a worker pod.""" + env_list = [ + client.V1EnvVar(name="AI_PROVIDER", value=cfg.ai_provider), + client.V1EnvVar( + name="GITHUB_TOKEN", + value_from=client.V1EnvVarSource( + secret_key_ref=client.V1SecretKeySelector(name=github_token_secret_name, key="GITHUB_TOKEN") + ), + ), + ] + for k, v in env_vars.items(): + env_list.append(client.V1EnvVar(name=k, value=v)) + + # Provider-specific extra env vars (plain values) + for env_key, env_val in cfg.extra_env: + env_list.append(client.V1EnvVar(name=env_key, value=env_val)) + + # Provider-specific API key + api_secret = cfg.api_secret + env_list.append( + client.V1EnvVar( + name=api_secret.env_name, + value_from=client.V1EnvVarSource( + secret_key_ref=client.V1SecretKeySelector(name=api_secret.secret_name, key=api_secret.secret_key) + ), + ) + ) + + # Provider-specific extra secrets (e.g. model selection) + for ref in cfg.extra_secrets: + env_list.append( + client.V1EnvVar( + name=ref.env_name, + value_from=client.V1EnvVarSource( + secret_key_ref=client.V1SecretKeySelector(name=ref.secret_name, key=ref.secret_key) + ), + ) + ) + + container = client.V1Container( + name="worker", + image=cfg.image, + image_pull_policy="Never", + env=env_list, + ) + template = client.V1PodTemplateSpec( + metadata=client.V1ObjectMeta(labels={"job-name": job_name, "provider": provider}), + spec=client.V1PodSpec(restart_policy="Never", containers=[container]), + ) + job_spec = client.V1JobSpec(template=template, backoff_limit=0, ttl_seconds_after_finished=settings.job_ttl_seconds) + return client.V1Job(metadata=client.V1ObjectMeta(name=job_name, namespace=settings.namespace), spec=job_spec) + + +def _create_or_replace_secret(core: client.CoreV1Api, name: str, string_data: dict[str, str]) -> None: + body = client.V1Secret( + metadata=client.V1ObjectMeta(name=name, namespace=settings.namespace), + type="Opaque", + string_data=string_data, + ) + try: + core.create_namespaced_secret(namespace=settings.namespace, body=body) + except ApiException as e: + if e.status == 409: + core.patch_namespaced_secret(name=name, namespace=settings.namespace, body=body) + else: + raise + + +def _delete_secret_if_exists(core: client.CoreV1Api, name: str) -> None: + try: + core.delete_namespaced_secret(name=name, namespace=settings.namespace) + except ApiException as e: + if e.status != 404: + raise + + +def _attach_job_owner_to_secret(core: client.CoreV1Api, secret_name: str, job_name: str, job_uid: str) -> None: + owner_ref = client.V1OwnerReference( + api_version="batch/v1", + kind="Job", + name=job_name, + uid=job_uid, + controller=False, + block_owner_deletion=False, + ) + patch_body = {"metadata": {"ownerReferences": [owner_ref.to_dict()]}} + core.patch_namespaced_secret(name=secret_name, namespace=settings.namespace, body=patch_body) + + +class SecretPayload(BaseModel): + github_app_id: str = Field(..., alias="GITHUB_APP_ID") + github_private_key_pem: str | None = Field(None, alias="GITHUB_PRIVATE_KEY_PEM") + github_private_key_path: str | None = Field(None, alias="GITHUB_PRIVATE_KEY_PATH") + replace: bool = Field(True, description="If true, apply/replace the secret (default true)") + + +class JobSpec(BaseModel): + name: str + image: str | None = settings.claude_worker_image + command: list[str] | None = None + env: dict[str, str] | None = None + backoff_limit: int = 0 + + +@app.get("/healthz") +def healthz(): + return {"ok": True} + + +@app.post("/secrets/github-app", status_code=201) +def create_or_update_github_app_secret(payload: SecretPayload = Body(...), _auth: bool = Depends(verify_admin_token)): + batch, core = load_k8s_client() + name = "github-app" + data = {} + + data["GITHUB_APP_ID"] = payload.github_app_id.encode("utf-8") + pem_bytes: bytes | None = None + if payload.github_private_key_pem: + pem_bytes = payload.github_private_key_pem.encode("utf-8") + elif payload.github_private_key_path: + path = payload.github_private_key_path + if not os.path.isfile(path): + raise HTTPException(status_code=400, detail=f"private key not found at path: {path}") + with open(path, "rb") as fh: + pem_bytes = fh.read() + else: + raise HTTPException( + status_code=400, + detail="either GITHUB_PRIVATE_KEY_PEM or GITHUB_PRIVATE_KEY_PATH is required", + ) + + data_b64 = { + k: base64.b64encode(v).decode("utf-8") + for k, v in { + "GITHUB_APP_ID": data["GITHUB_APP_ID"], + "GITHUB_PRIVATE_KEY": pem_bytes, + }.items() + } + + secret = client.V1Secret( + metadata=client.V1ObjectMeta(name=name, namespace=settings.namespace), + type="Opaque", + data=data_b64, + ) + + try: + core.patch_namespaced_secret(name=name, namespace=settings.namespace, body=secret) + action = "patched" + except ApiException as e: + if e.status == 404: + core.create_namespaced_secret(namespace=settings.namespace, body=secret) + action = "created" + else: + logger.exception( + "k8s error when creating/patching secret: status=%s reason=%s", + getattr(e, "status", None), + getattr(e, "reason", None), + ) + raise HTTPException( + status_code=500, + detail=f"k8s error: {e.reason} ({getattr(e, 'status', '')})", + ) from e + return { + "result": action, + "secret": name, + "namespace": settings.namespace, + "updated_at": datetime.now(UTC).isoformat(), + } + + +@app.post("/webhook/github") +async def github_webhook(request: Request): + body = await request.body() + headers = dict(request.headers) + source_provider = get_source_provider() + + if not await source_provider.verify_webhook(headers, body): + logger.warning("Invalid webhook signature") + raise HTTPException(status_code=401, detail="invalid webhook signature") + + event = request.headers.get("X-GitHub-Event", "") + if event != "issues": + logger.info("Ignored event=%s", event) + return {"ok": True, "ignored": True, "reason": f"event={event}"} + + source_event = await source_provider.parse_event(headers, body) + if not source_event: + logger.info("Ignored unhandled GitHub event=%s", event) + return {"ok": True, "ignored": True, "reason": f"event={event}"} + + payload = source_event.raw + action = payload.get("action") + + logger.info( + "Webhook received: event=issues action=%s repo=%s", + action, + ((payload.get("repository") or {}).get("full_name")), + ) + + logger.debug("TRIGGER_PREFIX: %s", settings.trigger_prefix) + logger.debug("PAYLOAD LABEL: %s", payload.get("label")) + + # Decide whether we trigger — match any label starting with TRIGGER_PREFIX + # e.g. "ai-pr-claude", "ai-pr-openai" + provider = None + + if action == "opened": + provider = issue_find_provider(payload, settings.trigger_prefix) + elif action == "labeled": + label = payload.get("label") or {} + label_name = label.get("name") if isinstance(label, dict) else str(label) + provider = _extract_provider_from_label(label_name, settings.trigger_prefix) + # else: action not handled + + if not provider: + reason = f"no label matching {settings.trigger_prefix}* for action={action}" + logger.info("Not triggering: %s", reason) + return {"ok": True, "ignored": True, "reason": reason} + + # Validate provider before any logging or further use + cfg = PROVIDER_CONFIG.get(provider) + if not cfg: + raise HTTPException(status_code=400, detail=f"unknown provider: {provider[:20]}") + + logger.info("Triggered with provider=%s", provider) + + repo_full = (payload.get("repository") or {}).get("full_name") + issue = payload.get("issue") or {} + issue_number = issue.get("number") + issue_title = issue.get("title", "")[:200] + issue_url = issue.get("html_url", "") + installation_id = (payload.get("installation") or {}).get("id") + + if not repo_full or not issue_number: + logger.error("Missing repo_full or issue_number") + raise HTTPException(status_code=400, detail="missing repository.full_name or issue.number") + + if not installation_id: + logger.error("Missing installation.id in webhook payload") + raise HTTPException(status_code=400, detail="missing installation.id (is the GitHub App installed?)") + + job_name = safe_name(f"ai-pr-{repo_full.replace('/', '-')}-{issue_number}-{provider}") + + batch, core = load_k8s_client() + + try: + _, github_token = await source_provider.get_clone_credentials(repo_full) + except SourceProviderConfigurationError as ex: + logger.exception("Source provider configuration error: %s", ex) + raise HTTPException(status_code=500, detail="source provider is not configured") from ex + except SourceProviderAPIError as ex: + logger.exception("Source provider API error: %s", ex) + raise HTTPException(status_code=502, detail="source provider API request failed") from ex + except SourceProviderError as ex: + logger.exception("Source provider error: %s", ex) + raise HTTPException(status_code=500, detail="source provider error") from ex + token_secret_name = safe_name(f"{job_name}-gh-token") + _create_or_replace_secret(core, token_secret_name, {"GITHUB_TOKEN": github_token}) + + job = _build_worker_job( + job_name=job_name, + cfg=cfg, + provider=provider, + env_vars={ + "GITHUB_REPO": repo_full, + "GITHUB_ISSUE_NUMBER": str(issue_number), + "GITHUB_EVENT_ACTION": str(action), + "GITHUB_ISSUE_TITLE": issue_title, + "GITHUB_ISSUE_URL": issue_url, + "GITHUB_INSTALLATION_ID": str(installation_id), + }, + github_token_secret_name=token_secret_name, + ) + + # --- Logging around create_namespaced_job --- + logger.info("Creating Job: name=%s namespace=%s image=%s", job_name, settings.namespace, cfg.image) + # logger.debug("Job body: %s", job) # inutile en prod (peut contenir secrets) + + try: + created_obj = batch.create_namespaced_job(namespace=settings.namespace, body=job) + created_name = getattr(created_obj.metadata, "name", None) + created_uid = getattr(created_obj.metadata, "uid", None) + logger.info("K8s Job created: name=%s uid=%s", created_name, created_uid) + if created_name and created_uid: + try: + _attach_job_owner_to_secret(core, token_secret_name, created_name, created_uid) + except Exception as ex: + logger.warning("Unable to attach ownerReference on secret %s: %s", token_secret_name, ex) + created = True + except ApiException as e: + # show useful details for debugging + logger.exception( + "ApiException creating job: status=%s reason=%s ", + getattr(e, "status", None), + getattr(e, "reason", None), + ) + if getattr(e, "status", None) == 409: + _delete_secret_if_exists(core, token_secret_name) + logger.info("Job already exists (idempotent): %s", job_name) + created = False + else: + _delete_secret_if_exists(core, token_secret_name) + raise HTTPException( + status_code=500, + detail=f"k8s error creating job: {getattr(e, 'reason', None)}", + ) from e + except Exception as ex: + _delete_secret_if_exists(core, token_secret_name) + logger.exception("Unexpected error creating job: %s", ex) + raise HTTPException(status_code=500, detail="internal error") from ex + + return { + "ok": True, + "triggered": True, + "created": created, + "job": job_name, + "namespace": settings.namespace, + "repo": repo_full, + "issue_number": issue_number, + "action": action, + "provider": provider, + "timestamp": datetime.now(UTC).isoformat(), + } + + +@app.post("/jobs/run") +def run_job(_auth: bool = Depends(verify_admin_token)): + job_id = str(uuid.uuid4())[:8] + job_name = f"manual-{job_id}" + + container = client.V1Container( + name="worker", + image=settings.claude_worker_image, + image_pull_policy="Never", + ) + + template = client.V1PodTemplateSpec( + metadata=client.V1ObjectMeta(labels={"job-name": job_name}), + spec=client.V1PodSpec( + restart_policy="Never", + containers=[container], + ), + ) + + job_spec = client.V1JobSpec(template=template, backoff_limit=0) + + job = client.V1Job( + api_version="batch/v1", + kind="Job", + metadata=client.V1ObjectMeta(name=job_name, namespace=settings.namespace), + spec=job_spec, + ) + + batch, _ = load_k8s_client() + logger.info("Manual run: creating job %s in %s", job_name, settings.namespace) + try: + batch.create_namespaced_job(namespace=settings.namespace, body=job) + except ApiException as e: + logger.exception( + "ApiException creating manual job: status=%s reason=%s", + getattr(e, "status", None), + getattr(e, "reason", None), + ) + raise HTTPException( + status_code=500, + detail=f"k8s error creating job: {getattr(e, 'reason', None)}", + ) from e + + return {"status": "started", "job_name": job_name} + + +def _extract_provider_from_label(label_name: str | None, prefix: str) -> str | None: + """Return validated provider suffix if label matches prefix, else None.""" + if not label_name or not label_name.startswith(prefix) or len(label_name) <= len(prefix): + return None + suffix = label_name[len(prefix) :] + if suffix in PROVIDER_CONFIG: + return suffix + return None + + +def issue_find_provider(payload: dict, prefix: str) -> str | None: + """Find the first label matching prefix and return the provider suffix, or None.""" + labels = (payload.get("issue") or {}).get("labels") or [] + for lb in labels: + name = lb.get("name") if isinstance(lb, dict) else str(lb) + result = _extract_provider_from_label(name, prefix) + if result: + return result + return None + + +if __name__ == "__main__": + import uvicorn + + uvicorn.run("app:app", host="0.0.0.0", port=int(os.getenv("PORT", "8000")), log_level="info") diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..fd23948 --- /dev/null +++ b/app/config.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +import os +from dataclasses import dataclass + + +def _env_bool(name: str, default: str = "false") -> bool: + return os.getenv(name, default).lower() in ("1", "true", "yes") + + +@dataclass(frozen=True) +class Settings: + namespace: str = os.getenv("NAMESPACE", "ai-bot") + job_ttl_seconds: int = int(os.getenv("JOB_TTL_SECONDS", "3600")) + trigger_prefix: str = os.getenv("TRIGGER_PREFIX", "ai-pr-") + webhook_secret: str = os.getenv("WEBHOOK_SECRET", "") + admin_token: str = os.getenv("ADMIN_TOKEN", "") + enable_k8s_debug: bool = _env_bool("ENABLE_K8S_DEBUG") + + claude_worker_image: str = os.getenv("CLAUDE_WORKER_IMAGE", "worker-claude:latest") + codex_worker_image: str = os.getenv("CODEX_WORKER_IMAGE", "worker-codex:latest") + aider_worker_image: str = os.getenv("AIDER_WORKER_IMAGE", "worker-aider:latest") + + github_app_id: str = os.getenv("GITHUB_APP_ID", "") + github_private_key: str = os.getenv("GITHUB_PRIVATE_KEY", "") + + +@dataclass(frozen=True) +class ProviderSecretRef: + env_name: str + secret_name: str + secret_key: str + + +@dataclass(frozen=True) +class ProviderConfig: + image: str + ai_provider: str + api_secret: ProviderSecretRef + extra_env: tuple[tuple[str, str], ...] = () + extra_secrets: tuple[ProviderSecretRef, ...] = () + + +settings = Settings() + + +PROVIDER_CONFIG: dict[str, ProviderConfig] = { + "claude": ProviderConfig( + image=settings.claude_worker_image, + ai_provider="claude_code", + api_secret=ProviderSecretRef("ANTHROPIC_API_KEY", "anthropic-api-key", "ANTHROPIC_API_KEY"), + ), + "codex": ProviderConfig( + image=settings.codex_worker_image, + ai_provider="openai", + api_secret=ProviderSecretRef("OPENAI_API_KEY", "openai-api-key", "OPENAI_API_KEY"), + ), + "aider": ProviderConfig( + image=settings.aider_worker_image, + ai_provider="aider", + api_secret=ProviderSecretRef("OPENROUTER_API_KEY", "openrouter-api-key", "OPENROUTER_API_KEY"), + ), +} diff --git a/app/requirements-dev.txt b/app/requirements-dev.txt new file mode 100644 index 0000000..b420d48 --- /dev/null +++ b/app/requirements-dev.txt @@ -0,0 +1,5 @@ +-r requirements.txt +pytest>=8.0,<9.0 +pytest-asyncio>=0.23,<1.0 +respx>=0.21,<1.0 +ruff>=0.8,<1.0 diff --git a/docs/adr/0001-source-provider-abstraction.md b/docs/adr/0001-source-provider-abstraction.md new file mode 100644 index 0000000..a31d62e --- /dev/null +++ b/docs/adr/0001-source-provider-abstraction.md @@ -0,0 +1,83 @@ +# ADR 0001: Source Provider Abstraction + +## Status + +Accepted + +## Context + +The orchestrator currently receives GitHub webhooks, verifies the GitHub +signature, reads GitHub issue payloads, generates GitHub App installation +tokens, and starts Kubernetes worker jobs. + +That works for the current proof of concept, but it couples the orchestration +flow to GitHub-specific details. Future support for GitLab, Gitea, Forgejo, or +other issue trackers would otherwise require GitHub conditionals in `app/app.py` +and make the core flow harder to test. + +The project also has two provider concepts: + +- Source providers: issue tracking and code hosting, such as GitHub or GitLab. +- AI worker providers: Claude, Codex, Aider. + +This ADR covers source providers only. + +## Decision + +Introduce an async `SourceProvider` interface under `providers/source/`. + +The interface owns provider-specific behavior for: + +- Webhook verification and event parsing. +- Issue, comment, label, and pull request operations. +- Git clone credentials via `get_clone_credentials(repo)`. + +GitHub remains the only built-in source provider for now. The existing GitHub +behavior is extracted into `GitHubProvider`, and `app/app.py` keeps responsibility +for orchestration decisions and Kubernetes job creation. + +`get_clone_credentials(repo)` returns `(username, token)` for HTTPS Git auth. +Implementations should use the shortest-lived and narrowest-scoped credential +available for their platform. + +For GitHub, this means: + +- Username: `x-access-token` +- Token: GitHub App installation token + +## Non-Goals + +- No GitLab, Gitea, Forgejo, or Linear implementation in this change. +- No worker environment variable migration yet. +- No label state machine changes. +- No change to the existing `ai-pr-*` trigger behavior. +- No replacement of Kubernetes job orchestration. + +## Consequences + +Positive: + +- `app/app.py` no longer needs to know GitHub webhook signature or token minting + details. +- Source-provider behavior is unit-testable with mocked HTTP. +- Adding another source provider can happen behind the same contract. +- The distinction between source providers and AI worker providers is explicit. + +Tradeoffs: + +- The first abstraction is based on GitHub's current behavior, so future + providers may reveal contract gaps. +- Some source platforms cannot always provide short-lived repo-scoped clone + credentials. Their implementations must document the best available security + model. +- The workers still receive GitHub-shaped environment variables in this PR. + Generic `SOURCE_*` variables should be handled in a later migration. + +## Validation + +The GitHub implementation is covered by unit tests for: + +- Webhook signature verification. +- Issue label webhook parsing. +- Label add/delete/replace operations. +- Clone credential generation. diff --git a/images/orchestrator/Dockerfile b/images/orchestrator/Dockerfile index c85e482..0000ce3 100644 --- a/images/orchestrator/Dockerfile +++ b/images/orchestrator/Dockerfile @@ -20,10 +20,11 @@ RUN pip install --upgrade pip \ && pip install --no-cache-dir -r /srv/requirements.txt -# code -COPY app/ /srv/app/ - -EXPOSE 8080 +# code +COPY app/ /srv/app/ +COPY providers/ /srv/providers/ + +EXPOSE 8080 # app/app.py => module "app.app", variable FastAPI "app" -CMD ["sh", "-c", "uvicorn app.app:app --host 0.0.0.0 --port ${PORT} --proxy-headers"] \ No newline at end of file +CMD ["sh", "-c", "uvicorn app.app:app --host 0.0.0.0 --port ${PORT} --proxy-headers"] diff --git a/providers/source/__init__.py b/providers/source/__init__.py new file mode 100644 index 0000000..5b2c969 --- /dev/null +++ b/providers/source/__init__.py @@ -0,0 +1,17 @@ +from .base import SourceProvider, SourceProviderAPIError, SourceProviderConfigurationError, SourceProviderError +from .factory import get_provider, register_provider +from .models import Comment, Issue, PullRequest, WebhookEvent, WebhookEventType + +__all__ = [ + "Comment", + "Issue", + "PullRequest", + "SourceProviderAPIError", + "SourceProviderConfigurationError", + "SourceProviderError", + "SourceProvider", + "WebhookEvent", + "WebhookEventType", + "get_provider", + "register_provider", +] diff --git a/providers/source/base.py b/providers/source/base.py new file mode 100644 index 0000000..522925a --- /dev/null +++ b/providers/source/base.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod + +from .models import Comment, Issue, PullRequest, WebhookEvent + + +class SourceProviderError(Exception): + """Base exception for source-provider failures.""" + + +class SourceProviderConfigurationError(SourceProviderError): + """Raised when provider configuration is missing or invalid.""" + + +class SourceProviderAPIError(SourceProviderError): + """Raised when the remote source-provider API fails.""" + + +class SourceProvider(ABC): + """Source of truth for issues and PRs. + + A single provider handles both issue tracking and code hosting for now. + Splitting these concerns is left to implementations. + """ + + name: str + + @abstractmethod + async def verify_webhook(self, headers: dict, body: bytes) -> bool: + """Return whether the webhook request is authentic.""" + + @abstractmethod + async def parse_event(self, headers: dict, body: bytes) -> WebhookEvent | None: + """Parse provider-specific webhook payload into a generic event.""" + + @abstractmethod + async def get_issue(self, repo: str, issue_id: str) -> Issue: + """Return one issue by provider-specific issue identifier.""" + + @abstractmethod + async def list_issues( + self, + repo: str, + *, + labels: list[str] | None = None, + state: str = "open", + ) -> list[Issue]: + """Return issues matching the provided filters.""" + + @abstractmethod + async def list_issue_comments(self, repo: str, issue_id: str) -> list[Comment]: + """Return comments for an issue.""" + + @abstractmethod + async def add_issue_comment(self, repo: str, issue_id: str, body: str) -> Comment: + """Add a comment to an issue.""" + + @abstractmethod + async def get_issue_labels(self, repo: str, issue_id: str) -> list[str]: + """Return labels for an issue.""" + + @abstractmethod + async def add_issue_label(self, repo: str, issue_id: str, label: str) -> None: + """Add a label to an issue.""" + + @abstractmethod + async def delete_issue_label(self, repo: str, issue_id: str, label: str) -> None: + """Delete a label from an issue.""" + + @abstractmethod + async def replace_issue_labels(self, repo: str, issue_id: str, labels: list[str]) -> None: + """Replace all labels on an issue.""" + + @abstractmethod + async def create_pr( + self, + repo: str, + *, + branch: str, + base: str, + title: str, + body: str, + issue_id: str | None = None, + ) -> PullRequest: + """Create a pull request.""" + + @abstractmethod + async def get_pr(self, repo: str, pr_id: str) -> PullRequest: + """Return one pull request.""" + + @abstractmethod + async def get_clone_credentials(self, repo: str) -> tuple[str, str]: + """Return (username, token) usable for git HTTPS auth. + + Implementations should return the shortest-lived and narrowest-scoped + credential supported by the provider/deployment. + """ diff --git a/providers/source/factory.py b/providers/source/factory.py new file mode 100644 index 0000000..198dffc --- /dev/null +++ b/providers/source/factory.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from .base import SourceProvider +from .github import GitHubProvider + +_REGISTRY: dict[str, type[SourceProvider]] = { + "github": GitHubProvider, +} + + +def get_provider(name: str, **kwargs) -> SourceProvider: + if name not in _REGISTRY: + raise ValueError(f"unknown provider {name!r}, available: {list(_REGISTRY)}") + return _REGISTRY[name](**kwargs) + + +def register_provider(name: str, cls: type[SourceProvider]) -> None: + """Allow external providers to plug in without core changes.""" + _REGISTRY[name] = cls diff --git a/providers/source/github.py b/providers/source/github.py new file mode 100644 index 0000000..949729f --- /dev/null +++ b/providers/source/github.py @@ -0,0 +1,414 @@ +from __future__ import annotations + +import hashlib +import hmac +import json +import logging +import time +from datetime import datetime +from urllib.parse import quote + +import httpx +import jwt + +from .base import SourceProvider, SourceProviderAPIError, SourceProviderConfigurationError +from .models import Comment, Issue, PullRequest, WebhookEvent + +logger = logging.getLogger("uvicorn.error") + +DEFAULT_HTTP_TIMEOUT = httpx.Timeout(10.0, connect=5.0) +GITHUB_SIGNATURE_HEX_LENGTH = 64 +JWT_LEEWAY_SECONDS = 60 +JWT_TTL_SECONDS = 600 +DEFAULT_INSTALLATION_TOKEN_TTL_SECONDS = 3300 + + +class GitHubProvider(SourceProvider): + name = "github" + + def __init__( + self, + *, + app_id: str, + private_key: str, + webhook_secret: str, + api_base_url: str = "https://api.github.com", + http_client: httpx.AsyncClient | None = None, + ) -> None: + self.app_id = app_id + self.private_key = private_key + self.webhook_secret = webhook_secret + self.api_base_url = api_base_url.rstrip("/") + self._http_client = http_client + self._installation_ids_by_repo: dict[str, str] = {} + self._installation_tokens_by_repo: dict[str, tuple[str, float]] = {} + + async def verify_webhook(self, headers: dict, body: bytes) -> bool: + if not self.webhook_secret: + logger.error("WEBHOOK_SECRET is not configured - rejecting all webhooks") + return False + + signature_header = _header(headers, "X-Hub-Signature-256") + if not signature_header or not signature_header.startswith("sha256="): + return False + + their_sig = signature_header.split("=", 1)[1].strip() + if len(their_sig) != GITHUB_SIGNATURE_HEX_LENGTH or not _is_hex(their_sig): + return False + + mac = hmac.new(self.webhook_secret.encode("utf-8"), msg=body, digestmod=hashlib.sha256) + return hmac.compare_digest(mac.hexdigest(), their_sig) + + async def parse_event(self, headers: dict, body: bytes) -> WebhookEvent | None: + event_name = _header(headers, "X-GitHub-Event") + if not event_name: + return None + + try: + payload = json.loads(body.decode("utf-8")) + except (json.JSONDecodeError, UnicodeDecodeError): + return None + if not isinstance(payload, dict): + return None + + repo = (payload.get("repository") or {}).get("full_name") + if repo: + installation_id = (payload.get("installation") or {}).get("id") + if installation_id: + self._installation_ids_by_repo[repo] = str(installation_id) + + sender = payload.get("sender") or {} + actor = sender.get("login", "") + action = payload.get("action") + + if event_name == "issues": + issue = _issue_from_payload(payload) + if not issue: + return None + if action == "opened": + return WebhookEvent(type="issue_opened", actor=actor, repo=issue.repo, issue=issue, raw=payload) + if action == "labeled": + label = _label_name(payload.get("label")) + return WebhookEvent( + type="issue_labeled", + actor=actor, + repo=issue.repo, + issue=issue, + label=label, + raw=payload, + ) + if action == "unlabeled": + label = _label_name(payload.get("label")) + return WebhookEvent( + type="issue_unlabeled", + actor=actor, + repo=issue.repo, + issue=issue, + label=label, + raw=payload, + ) + return None + + if event_name == "issue_comment" and action == "created": + issue = _issue_from_payload(payload) + comment = _comment_from_payload(payload) + if not issue or not comment: + return None + return WebhookEvent( + type="issue_commented", + actor=actor, + repo=issue.repo, + issue=issue, + comment=comment, + raw=payload, + ) + + if event_name == "pull_request": + pr = _pr_from_payload(payload) + if not pr: + return None + if action == "opened": + return WebhookEvent(type="pr_opened", actor=actor, repo=pr.repo, pr=pr, raw=payload) + if action == "closed": + event_type = "pr_merged" if (payload.get("pull_request") or {}).get("merged") else "pr_closed" + return WebhookEvent(type=event_type, actor=actor, repo=pr.repo, pr=pr, raw=payload) + return None + + return None + + async def get_issue(self, repo: str, issue_id: str) -> Issue: + data = await self._request_json("GET", f"/repos/{repo}/issues/{issue_id}", repo=repo) + return _issue_from_api(repo, data) + + async def list_issues( + self, + repo: str, + *, + labels: list[str] | None = None, + state: str = "open", + ) -> list[Issue]: + params: dict[str, str] = {"state": state} + if labels: + params["labels"] = ",".join(labels) + data = await self._request_json("GET", f"/repos/{repo}/issues", repo=repo, params=params) + return [_issue_from_api(repo, item) for item in data] + + async def list_issue_comments(self, repo: str, issue_id: str) -> list[Comment]: + data = await self._request_json("GET", f"/repos/{repo}/issues/{issue_id}/comments", repo=repo) + return [_comment_from_api(str(issue_id), item) for item in data] + + async def add_issue_comment(self, repo: str, issue_id: str, body: str) -> Comment: + data = await self._request_json( + "POST", + f"/repos/{repo}/issues/{issue_id}/comments", + repo=repo, + json={"body": body}, + ) + return _comment_from_api(str(issue_id), data) + + async def get_issue_labels(self, repo: str, issue_id: str) -> list[str]: + data = await self._request_json("GET", f"/repos/{repo}/issues/{issue_id}/labels", repo=repo) + return [item["name"] for item in data if "name" in item] + + async def add_issue_label(self, repo: str, issue_id: str, label: str) -> None: + await self._request_json( + "POST", + f"/repos/{repo}/issues/{issue_id}/labels", + repo=repo, + json={"labels": [label]}, + ) + + async def delete_issue_label(self, repo: str, issue_id: str, label: str) -> None: + encoded_label = quote(label, safe="") + await self._request_json("DELETE", f"/repos/{repo}/issues/{issue_id}/labels/{encoded_label}", repo=repo) + + async def replace_issue_labels(self, repo: str, issue_id: str, labels: list[str]) -> None: + await self._request_json( + "PUT", + f"/repos/{repo}/issues/{issue_id}/labels", + repo=repo, + json={"labels": labels}, + ) + + async def create_pr( + self, + repo: str, + *, + branch: str, + base: str, + title: str, + body: str, + issue_id: str | None = None, + ) -> PullRequest: + data = await self._request_json( + "POST", + f"/repos/{repo}/pulls", + repo=repo, + json={"head": branch, "base": base, "title": title, "body": body}, + ) + pr = _pr_from_api(repo, data) + pr.issue_id = issue_id + return pr + + async def get_pr(self, repo: str, pr_id: str) -> PullRequest: + data = await self._request_json("GET", f"/repos/{repo}/pulls/{pr_id}", repo=repo) + return _pr_from_api(repo, data) + + async def get_clone_credentials(self, repo: str) -> tuple[str, str]: + token = await self._installation_token_for_repo(repo) + return "x-access-token", token + + async def _installation_token_for_repo(self, repo: str) -> str: + cached = self._installation_tokens_by_repo.get(repo) + if cached: + token, expires_at = cached + if time.time() < expires_at: + return token + + return await self._generate_installation_token(repo) + + async def _generate_installation_token(self, repo: str) -> str: + if not self.app_id or not self.private_key: + raise SourceProviderConfigurationError("GITHUB_APP_ID and GITHUB_PRIVATE_KEY must be configured") + + installation_id = await self._get_installation_id(repo) + encoded_jwt = self._build_app_jwt() + headers = self._github_headers(f"Bearer {encoded_jwt}") + response = await self._request( + "POST", + f"/app/installations/{installation_id}/access_tokens", + headers=headers, + ) + if response.status_code != 201: + raise SourceProviderAPIError(f"GitHub installation token exchange failed ({response.status_code})") + + payload = response.json() + token = payload.get("token") + if not token: + raise SourceProviderAPIError("GitHub API returned no token") + self._installation_tokens_by_repo[repo] = (token, _token_expires_at(payload)) + return token + + async def _get_installation_id(self, repo: str) -> str: + cached = self._installation_ids_by_repo.get(repo) + if cached: + return cached + + encoded_jwt = self._build_app_jwt() + response = await self._request( + "GET", + f"/repos/{repo}/installation", + headers=self._github_headers(f"Bearer {encoded_jwt}"), + ) + if response.status_code != 200: + raise SourceProviderAPIError(f"GitHub installation lookup failed ({response.status_code})") + + installation_id = response.json().get("id") + if not installation_id: + raise SourceProviderAPIError("GitHub API returned no installation id") + self._installation_ids_by_repo[repo] = str(installation_id) + return str(installation_id) + + def _build_app_jwt(self) -> str: + now = int(time.time()) + payload = { + "iat": now - JWT_LEEWAY_SECONDS, + "exp": now + JWT_TTL_SECONDS, + "iss": self.app_id, + } + return jwt.encode(payload, self.private_key, algorithm="RS256") + + async def _request_json(self, method: str, path: str, *, repo: str, **kwargs) -> dict | list: + token = await self._installation_token_for_repo(repo) + response = await self._request(method, path, headers=self._github_headers(f"Bearer {token}"), **kwargs) + if response.status_code >= 400: + raise SourceProviderAPIError(f"GitHub API request failed ({response.status_code})") + if response.status_code == 204: + return {} + return response.json() + + async def _request(self, method: str, path: str, **kwargs) -> httpx.Response: + url = f"{self.api_base_url}{path}" + kwargs.setdefault("timeout", DEFAULT_HTTP_TIMEOUT) + if self._http_client: + return await self._http_client.request(method, url, **kwargs) + async with httpx.AsyncClient() as client: + return await client.request(method, url, **kwargs) + + @staticmethod + def _github_headers(authorization: str) -> dict[str, str]: + return { + "Authorization": authorization, + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + + +def _header(headers: dict, name: str) -> str | None: + if name in headers: + return headers[name] + lowered = name.lower() + for key, value in headers.items(): + if key.lower() == lowered: + return value + return None + + +def _is_hex(value: str) -> bool: + try: + int(value, 16) + except ValueError: + return False + return True + + +def _token_expires_at(payload: dict) -> float: + expires_at = payload.get("expires_at") + if isinstance(expires_at, str): + try: + parsed = datetime.fromisoformat(expires_at.replace("Z", "+00:00")) + return parsed.timestamp() + except ValueError: + pass + return time.time() + DEFAULT_INSTALLATION_TOKEN_TTL_SECONDS + + +def _label_name(label: object) -> str | None: + if isinstance(label, dict): + value = label.get("name") + return str(value) if value is not None else None + if label is not None: + return str(label) + return None + + +def _issue_from_payload(payload: dict) -> Issue | None: + repo = (payload.get("repository") or {}).get("full_name") + issue = payload.get("issue") or {} + if not repo or not issue: + return None + return _issue_from_api(repo, issue) + + +def _issue_from_api(repo: str, data: dict) -> Issue: + labels = [_label_name(label) for label in data.get("labels", [])] + user = data.get("user") or {} + return Issue( + id=str(data.get("id", "")), + number=int(data.get("number", 0)), + repo=repo, + title=data.get("title") or "", + body=data.get("body") or "", + labels=[label for label in labels if label], + author=user.get("login", ""), + url=data.get("html_url") or "", + state=data.get("state") or "open", + created_at=data.get("created_at"), + updated_at=data.get("updated_at"), + raw=data, + ) + + +def _comment_from_payload(payload: dict) -> Comment | None: + issue = payload.get("issue") or {} + comment = payload.get("comment") or {} + if not issue or not comment: + return None + return _comment_from_api(str(issue.get("id", issue.get("number", ""))), comment) + + +def _comment_from_api(issue_id: str, data: dict) -> Comment: + user = data.get("user") or {} + return Comment( + id=str(data.get("id", "")), + issue_id=issue_id, + body=data.get("body") or "", + author=user.get("login", ""), + created_at=data.get("created_at"), + ) + + +def _pr_from_payload(payload: dict) -> PullRequest | None: + repo = (payload.get("repository") or {}).get("full_name") + pr = payload.get("pull_request") or {} + if not repo or not pr: + return None + return _pr_from_api(repo, pr) + + +def _pr_from_api(repo: str, data: dict) -> PullRequest: + user_state = data.get("state") or "open" + state = "merged" if data.get("merged") or data.get("merged_at") else user_state + head = data.get("head") or {} + base = data.get("base") or {} + return PullRequest( + id=str(data.get("id", "")), + number=int(data.get("number", 0)), + repo=repo, + title=data.get("title") or "", + body=data.get("body") or "", + branch=head.get("ref", ""), + base=base.get("ref", ""), + state=state, + url=data.get("html_url") or "", + ) diff --git a/providers/source/models.py b/providers/source/models.py new file mode 100644 index 0000000..7037f92 --- /dev/null +++ b/providers/source/models.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +from datetime import datetime +from typing import Literal + +from pydantic import BaseModel, Field + + +class Issue(BaseModel): + id: str + number: int + repo: str + title: str + body: str + labels: list[str] + author: str + url: str + state: Literal["open", "closed"] + created_at: datetime + updated_at: datetime + raw: dict = Field(default_factory=dict) + + +class Comment(BaseModel): + id: str + issue_id: str + body: str + author: str + created_at: datetime + + +class PullRequest(BaseModel): + id: str + number: int + repo: str + title: str + body: str + branch: str + base: str + state: Literal["open", "closed", "merged"] + url: str + issue_id: str | None = None + + +WebhookEventType = Literal[ + "issue_opened", + "issue_labeled", + "issue_unlabeled", + "issue_commented", + "pr_opened", + "pr_merged", + "pr_closed", +] + + +class WebhookEvent(BaseModel): + type: WebhookEventType + actor: str + repo: str + issue: Issue | None = None + pr: PullRequest | None = None + comment: Comment | None = None + label: str | None = None + raw: dict = Field(default_factory=dict) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1e42ba3 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,7 @@ +[tool.ruff] +line-length = 120 +target-version = "py312" + +[tool.ruff.lint] +select = ["E", "F", "I", "UP", "B"] +ignore = ["B008"] diff --git a/tests/app/test_webhook.py b/tests/app/test_webhook.py new file mode 100644 index 0000000..0ab2a29 --- /dev/null +++ b/tests/app/test_webhook.py @@ -0,0 +1,104 @@ +from __future__ import annotations + +import json +from pathlib import Path +from types import SimpleNamespace + +import httpx +import pytest + +import app.app as orchestrator +from providers.source.models import WebhookEvent + +FIXTURES = Path(__file__).parents[1] / "providers" / "fixtures" + + +class FakeSourceProvider: + def __init__(self, *, verify: bool = True, event: WebhookEvent | None = None) -> None: + self.verify = verify + self.event = event + + async def verify_webhook(self, headers: dict, body: bytes) -> bool: + return self.verify + + async def parse_event(self, headers: dict, body: bytes) -> WebhookEvent | None: + return self.event + + async def get_clone_credentials(self, repo: str) -> tuple[str, str]: + return "x-access-token", "installation-token" + + +class FakeBatch: + def __init__(self) -> None: + self.created_jobs = [] + + def create_namespaced_job(self, *, namespace: str, body): + self.created_jobs.append((namespace, body)) + return SimpleNamespace(metadata=SimpleNamespace(name=body.metadata.name, uid="job-uid")) + + +class FakeCore: + def __init__(self) -> None: + self.secrets = [] + self.secret_patches = [] + + def create_namespaced_secret(self, *, namespace: str, body) -> None: + self.secrets.append((namespace, body)) + + def patch_namespaced_secret(self, *, name: str, namespace: str, body) -> None: + self.secret_patches.append((name, namespace, body)) + + +def load_payload(name: str) -> dict: + return json.loads((FIXTURES / name).read_text()) + + +async def post_webhook(body: bytes, headers: dict): + transport = httpx.ASGITransport(app=orchestrator.app) + async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client: + return await client.post("/webhook/github", content=body, headers=headers) + + +@pytest.mark.asyncio +async def test_github_webhook_rejects_invalid_signature(monkeypatch): + monkeypatch.setattr(orchestrator, "get_source_provider", lambda: FakeSourceProvider(verify=False)) + + response = await post_webhook(b"{}", {"X-GitHub-Event": "issues"}) + + assert response.status_code == 401 + + +@pytest.mark.asyncio +async def test_github_webhook_ignores_non_issue_event(monkeypatch): + monkeypatch.setattr(orchestrator, "get_source_provider", lambda: FakeSourceProvider()) + + response = await post_webhook(b"{}", {"X-GitHub-Event": "pull_request"}) + + assert response.status_code == 200 + assert response.json() == {"ok": True, "ignored": True, "reason": "event=pull_request"} + + +@pytest.mark.asyncio +async def test_github_webhook_triggers_worker_job(monkeypatch): + payload = load_payload("issue_labeled.json") + event = WebhookEvent(type="issue_labeled", actor="bob", repo="acme/widgets", label="ai-pr-claude", raw=payload) + batch = FakeBatch() + core = FakeCore() + monkeypatch.setattr(orchestrator, "get_source_provider", lambda: FakeSourceProvider(event=event)) + monkeypatch.setattr(orchestrator, "load_k8s_client", lambda: (batch, core)) + + response = await post_webhook( + json.dumps(payload).encode("utf-8"), + {"X-GitHub-Event": "issues"}, + ) + + assert response.status_code == 200 + data = response.json() + assert data["triggered"] is True + assert data["created"] is True + assert data["repo"] == "acme/widgets" + assert data["issue_number"] == 7 + assert data["provider"] == "claude" + assert len(core.secrets) == 1 + assert core.secrets[0][1].string_data == {"GITHUB_TOKEN": "installation-token"} + assert len(batch.created_jobs) == 1 diff --git a/tests/providers/fixtures/issue_labeled.json b/tests/providers/fixtures/issue_labeled.json new file mode 100644 index 0000000..2a67ca5 --- /dev/null +++ b/tests/providers/fixtures/issue_labeled.json @@ -0,0 +1,28 @@ +{ + "action": "labeled", + "issue": { + "id": 1001, + "number": 7, + "title": "Implement provider abstraction", + "body": "Move GitHub-specific code behind an interface.", + "labels": [ + {"name": "bug"}, + {"name": "ai-pr-claude"} + ], + "user": {"login": "alice"}, + "html_url": "https://github.com/acme/widgets/issues/7", + "state": "open", + "created_at": "2026-04-30T08:00:00Z", + "updated_at": "2026-04-30T09:00:00Z" + }, + "label": {"name": "ai-pr-claude"}, + "repository": { + "full_name": "acme/widgets" + }, + "installation": { + "id": 42 + }, + "sender": { + "login": "bob" + } +} diff --git a/tests/providers/fixtures/issue_opened.json b/tests/providers/fixtures/issue_opened.json new file mode 100644 index 0000000..1ac5eba --- /dev/null +++ b/tests/providers/fixtures/issue_opened.json @@ -0,0 +1,26 @@ +{ + "action": "opened", + "issue": { + "id": 1001, + "number": 7, + "title": "Implement provider abstraction", + "body": "Move GitHub-specific code behind an interface.", + "labels": [ + {"name": "ai-pr-claude"} + ], + "user": {"login": "alice"}, + "html_url": "https://github.com/acme/widgets/issues/7", + "state": "open", + "created_at": "2026-04-30T08:00:00Z", + "updated_at": "2026-04-30T09:00:00Z" + }, + "repository": { + "full_name": "acme/widgets" + }, + "installation": { + "id": 42 + }, + "sender": { + "login": "alice" + } +} diff --git a/tests/providers/fixtures/issue_unlabeled.json b/tests/providers/fixtures/issue_unlabeled.json new file mode 100644 index 0000000..c87fabd --- /dev/null +++ b/tests/providers/fixtures/issue_unlabeled.json @@ -0,0 +1,27 @@ +{ + "action": "unlabeled", + "issue": { + "id": 1001, + "number": 7, + "title": "Implement provider abstraction", + "body": "Move GitHub-specific code behind an interface.", + "labels": [ + {"name": "bug"} + ], + "user": {"login": "alice"}, + "html_url": "https://github.com/acme/widgets/issues/7", + "state": "open", + "created_at": "2026-04-30T08:00:00Z", + "updated_at": "2026-04-30T09:00:00Z" + }, + "label": {"name": "ai-pr-claude"}, + "repository": { + "full_name": "acme/widgets" + }, + "installation": { + "id": 42 + }, + "sender": { + "login": "bob" + } +} diff --git a/tests/providers/fixtures/pr_opened.json b/tests/providers/fixtures/pr_opened.json new file mode 100644 index 0000000..2f33fd3 --- /dev/null +++ b/tests/providers/fixtures/pr_opened.json @@ -0,0 +1,23 @@ +{ + "action": "opened", + "pull_request": { + "id": 2001, + "number": 12, + "title": "Add provider interface", + "body": "Extract GitHub provider.", + "head": {"ref": "feature/source-provider"}, + "base": {"ref": "main"}, + "state": "open", + "merged": false, + "html_url": "https://github.com/acme/widgets/pull/12" + }, + "repository": { + "full_name": "acme/widgets" + }, + "installation": { + "id": 42 + }, + "sender": { + "login": "alice" + } +} diff --git a/tests/providers/test_github.py b/tests/providers/test_github.py new file mode 100644 index 0000000..06abf8d --- /dev/null +++ b/tests/providers/test_github.py @@ -0,0 +1,204 @@ +from __future__ import annotations + +import hashlib +import hmac +from pathlib import Path + +import pytest +import respx +from httpx import Response + +from providers.source.github import GitHubProvider + +FIXTURES = Path(__file__).parent / "fixtures" +REPO = "acme/widgets" + + +def provider() -> GitHubProvider: + return GitHubProvider( + app_id="123", + private_key="test-private-key", + webhook_secret="webhook-secret", + ) + + +def load_fixture(name: str) -> bytes: + return (FIXTURES / name).read_bytes() + + +def signature(body: bytes, secret: str = "webhook-secret") -> str: + digest = hmac.new(secret.encode("utf-8"), body, hashlib.sha256).hexdigest() + return f"sha256={digest}" + + +@pytest.mark.asyncio +async def test_verify_webhook_valid_signature(): + body = load_fixture("issue_labeled.json") + headers = {"X-Hub-Signature-256": signature(body)} + + assert await provider().verify_webhook(headers, body) is True + + +@pytest.mark.asyncio +async def test_verify_webhook_invalid_signature(): + body = load_fixture("issue_labeled.json") + headers = {"X-Hub-Signature-256": "sha256=bad"} + + assert await provider().verify_webhook(headers, body) is False + + +@pytest.mark.asyncio +async def test_verify_webhook_rejects_non_hex_signature(): + body = load_fixture("issue_labeled.json") + headers = {"X-Hub-Signature-256": f"sha256={'z' * 64}"} + + assert await provider().verify_webhook(headers, body) is False + + +@pytest.mark.asyncio +async def test_parse_event_rejects_invalid_body(): + assert await provider().parse_event({"X-GitHub-Event": "issues"}, b"\xff") is None + assert await provider().parse_event({"X-GitHub-Event": "issues"}, b"[]") is None + + +@pytest.mark.asyncio +async def test_parse_event_issue_opened(): + body = load_fixture("issue_opened.json") + event = await provider().parse_event({"X-GitHub-Event": "issues"}, body) + + assert event is not None + assert event.type == "issue_opened" + assert event.actor == "alice" + assert event.repo == REPO + assert event.issue is not None + assert event.issue.number == 7 + + +@pytest.mark.asyncio +async def test_parse_event_issue_labeled(): + body = load_fixture("issue_labeled.json") + event = await provider().parse_event({"X-GitHub-Event": "issues"}, body) + + assert event is not None + assert event.type == "issue_labeled" + assert event.actor == "bob" + assert event.repo == REPO + assert event.label == "ai-pr-claude" + assert event.issue is not None + assert event.issue.number == 7 + assert event.issue.labels == ["bug", "ai-pr-claude"] + + +@pytest.mark.asyncio +async def test_parse_event_issue_unlabeled(): + body = load_fixture("issue_unlabeled.json") + event = await provider().parse_event({"X-GitHub-Event": "issues"}, body) + + assert event is not None + assert event.type == "issue_unlabeled" + assert event.label == "ai-pr-claude" + assert event.issue is not None + assert event.issue.labels == ["bug"] + + +@pytest.mark.asyncio +async def test_parse_event_pr_opened(): + body = load_fixture("pr_opened.json") + event = await provider().parse_event({"X-GitHub-Event": "pull_request"}, body) + + assert event is not None + assert event.type == "pr_opened" + assert event.actor == "alice" + assert event.repo == REPO + assert event.pr is not None + assert event.pr.number == 12 + assert event.pr.branch == "feature/source-provider" + assert event.pr.base == "main" + + +@pytest.mark.asyncio +@respx.mock +async def test_add_issue_label(monkeypatch): + gh = provider() + gh._installation_ids_by_repo[REPO] = "42" + monkeypatch.setattr(gh, "_build_app_jwt", lambda: "jwt") + respx.post("https://api.github.com/app/installations/42/access_tokens").mock( + return_value=Response(201, json={"token": "installation-token"}) + ) + route = respx.post(f"https://api.github.com/repos/{REPO}/issues/7/labels").mock( + return_value=Response(200, json=[{"name": "ai-pr-claude"}]) + ) + + await gh.add_issue_label(REPO, "7", "ai-pr-claude") + + assert route.called + assert route.calls.last.request.content == b'{"labels":["ai-pr-claude"]}' + + +@pytest.mark.asyncio +@respx.mock +async def test_delete_issue_label(monkeypatch): + gh = provider() + gh._installation_ids_by_repo[REPO] = "42" + monkeypatch.setattr(gh, "_build_app_jwt", lambda: "jwt") + respx.post("https://api.github.com/app/installations/42/access_tokens").mock( + return_value=Response(201, json={"token": "installation-token"}) + ) + route = respx.delete(f"https://api.github.com/repos/{REPO}/issues/7/labels/ai-pr-claude").mock( + return_value=Response(200, json=[]) + ) + + await gh.delete_issue_label(REPO, "7", "ai-pr-claude") + + assert route.called + + +@pytest.mark.asyncio +@respx.mock +async def test_replace_issue_labels_atomic(monkeypatch): + gh = provider() + gh._installation_ids_by_repo[REPO] = "42" + monkeypatch.setattr(gh, "_build_app_jwt", lambda: "jwt") + respx.post("https://api.github.com/app/installations/42/access_tokens").mock( + return_value=Response(201, json={"token": "installation-token"}) + ) + route = respx.put(f"https://api.github.com/repos/{REPO}/issues/7/labels").mock( + return_value=Response(200, json=[{"name": "phase-ready"}]) + ) + + await gh.replace_issue_labels(REPO, "7", ["phase-ready"]) + + assert route.called + assert route.calls.last.request.content == b'{"labels":["phase-ready"]}' + + +@pytest.mark.asyncio +@respx.mock +async def test_get_clone_credentials_returns_ephemeral_token(monkeypatch): + gh = provider() + monkeypatch.setattr(gh, "_build_app_jwt", lambda: "jwt") + respx.get(f"https://api.github.com/repos/{REPO}/installation").mock(return_value=Response(200, json={"id": 42})) + respx.post("https://api.github.com/app/installations/42/access_tokens").mock( + return_value=Response(201, json={"token": "installation-token"}) + ) + + username, token = await gh.get_clone_credentials(REPO) + + assert username == "x-access-token" + assert token == "installation-token" + + +@pytest.mark.asyncio +@respx.mock +async def test_installation_token_is_cached(monkeypatch): + gh = provider() + monkeypatch.setattr(gh, "_build_app_jwt", lambda: "jwt") + respx.get(f"https://api.github.com/repos/{REPO}/installation").mock(return_value=Response(200, json={"id": 42})) + token_route = respx.post("https://api.github.com/app/installations/42/access_tokens").mock( + return_value=Response(201, json={"token": "installation-token"}) + ) + + assert await gh._installation_token_for_repo(REPO) == "installation-token" + assert await gh._installation_token_for_repo(REPO) == "installation-token" + + assert token_route.call_count == 1