From a08aa6e873feb6a275561ea263f0a93a5403f1dd Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 5 May 2026 13:35:09 +0000 Subject: [PATCH] feat(demo-function): discover manifest from Playwright TS specs - Resolve sibling YAML: `foo.spec.docgen.yaml` and `foo.docgen.yaml` - Parse `type: docgen` annotations per `test()` block (JSON.stringify + strings) - Support `path.spec.ts::title` and `--grep` for disambiguation - Fix `demonstration.grep` for sidecars without `demonstration.spec` Closes #49 Co-authored-by: John Menke --- src/docgen/cli.py | 8 + src/docgen/demo_function.py | 375 ++++++++++++++++++++++++++++++++++-- tests/test_demo_function.py | 173 +++++++++++++++++ 3 files changed, 543 insertions(+), 13 deletions(-) diff --git a/src/docgen/cli.py b/src/docgen/cli.py index 974b828..b739bc6 100644 --- a/src/docgen/cli.py +++ b/src/docgen/cli.py @@ -205,12 +205,19 @@ def playwright( is_flag=True, help="Skip TTS even if OPENAI_API_KEY is set.", ) +@click.option( + "--grep", + "grep_arg", + default=None, + help="Playwright test title filter when --manifest is a .ts/.tsx spec (or overrides YAML).", +) @click.pass_context def demo_function( ctx: click.Context, manifest_arg: str, output_dir_arg: str | None, output_dir_legacy: str | None, + grep_arg: str | None, cache_dir_arg: str | None, no_narration: bool, ) -> None: @@ -224,6 +231,7 @@ def demo_function( code = run_cli( manifest_arg=manifest_arg, output_dir_arg=out, + grep=grep_arg, cache_dir_arg=cache_dir_arg, no_narration=no_narration, ) diff --git a/src/docgen/demo_function.py b/src/docgen/demo_function.py index 62964a3..14a02c2 100644 --- a/src/docgen/demo_function.py +++ b/src/docgen/demo_function.py @@ -65,6 +65,8 @@ CACHED_ARTIFACTS = ("rendered.mp4", "poster.png", "fragment.txt", "manifest.json") +_PLAYWRIGHT_SPEC_SUFFIXES = frozenset({".ts", ".tsx", ".mts", ".cts"}) + class ManifestError(ValueError): """Raised when a manifest is malformed or violates the documented schema.""" @@ -213,21 +215,44 @@ def _read_fixture_bytes(self, fixture: str) -> bytes | None: # --------------------------------------------------------------------------- -def load_manifest(spec: str | Path) -> Manifest: - """Load a manifest from either a YAML sidecar path or `path.py::test_name`. +def load_manifest( + spec: str | Path, + *, + grep: str | None = None, +) -> Manifest: + """Load a manifest from YAML, `path.py::test_name`, or a Playwright spec path. + + Playwright TypeScript (``.ts`` / ``.tsx`` / ``.mts`` / ``.cts``): + + - ``spec.ts::Test title`` — same as ``--manifest spec.ts --grep "Test title"``. + - ``spec.ts`` + ``--grep`` — selects the matching test for annotation discovery. + - ``spec.ts`` alone — tries ``spec.docgen.yaml`` sidecar, else parses + ``test.info().annotations`` with ``type: 'docgen'`` (must match exactly + one test unless ``grep`` is set). Raises `ManifestError` for invalid manifests, `FileNotFoundError` if the path does not exist. """ if isinstance(spec, Path): - return _load_yaml_sidecar(spec) + p = spec.resolve() + if not p.exists(): + raise FileNotFoundError(f"manifest not found: {p}") + if _is_playwright_spec_path(p): + return _load_playwright_ts_manifest(p, test_title=None, grep=grep) + return _load_yaml_sidecar(p) text = str(spec) if "::" in text: - path_part, _, test_name = text.partition("::") - py_path = Path(path_part) - if not py_path.exists(): - raise FileNotFoundError(f"manifest not found: {py_path}") - return _load_pytest_marker(py_path, test_name) + path_part, _, tail = text.partition("::") + path_obj = Path(path_part) + if not path_obj.exists(): + raise FileNotFoundError(f"manifest not found: {path_obj}") + if _is_playwright_spec_path(path_obj): + return _load_playwright_ts_manifest( + path_obj, + test_title=tail.strip(), + grep=None, + ) + return _load_pytest_marker(path_obj, tail) p = Path(text) if not p.exists(): raise FileNotFoundError(f"manifest not found: {p}") @@ -235,9 +260,331 @@ def load_manifest(spec: str | Path) -> Manifest: raise ManifestError( "Python manifest must use 'path.py::test_name' syntax to select a test" ) + if _is_playwright_spec_path(p): + return _load_playwright_ts_manifest(p, test_title=None, grep=grep) return _load_yaml_sidecar(p) +def _is_playwright_spec_path(path: Path) -> bool: + return path.suffix.lower() in _PLAYWRIGHT_SPEC_SUFFIXES + + +def _playwright_sidecar_paths(spec_path: Path) -> list[Path]: + """Candidate sibling manifests for ``foo.spec.ts`` (short + long stem).""" + spec_path = spec_path.resolve() + parent = spec_path.parent + stem = spec_path.stem # e.g. ``lesson.spec`` for ``lesson.spec.ts`` + names = [f"{stem}.docgen.yaml"] + if stem.endswith(".spec"): + names.append(f"{stem[:-len('.spec')]}.docgen.yaml") + # De-dupe while preserving order + seen: set[str] = set() + out: list[Path] = [] + for n in names: + if n not in seen: + seen.add(n) + out.append(parent / n) + return out + + +def _find_playwright_sidecar(spec_path: Path) -> Path | None: + for p in _playwright_sidecar_paths(spec_path): + if p.exists(): + return p + return None + + +def _load_playwright_ts_manifest( + spec_path: Path, + *, + test_title: str | None, + grep: str | None, +) -> Manifest: + """Resolve manifest for a Node ``@playwright/test`` spec file.""" + spec_path = spec_path.resolve() + effective_grep = (test_title or "").strip() if test_title else None + if effective_grep is None and grep: + effective_grep = grep.strip() or None + + sidecar = _find_playwright_sidecar(spec_path) + if sidecar is not None: + manifest = _load_yaml_sidecar(sidecar) + manifest.fn_source_path = spec_path + if manifest.kind == "playwright": + if manifest.pw_spec is None: + manifest.pw_spec = spec_path + if manifest.pw_grep is None and effective_grep: + manifest.pw_grep = effective_grep + if manifest.pw_spec and manifest.pw_grep is None: + raise ManifestError( + f"{sidecar.name}: demonstration.grep is required when using a " + f"TypeScript manifest entry (or pass --grep / path.ts::title)" + ) + return manifest + + raw = _parse_ts_docgen_contract(spec_path, grep=effective_grep) + return _coerce(raw, source_path=spec_path) + + +def _parse_ts_docgen_contract(spec_path: Path, *, grep: str | None) -> dict[str, Any]: + """Extract JSON contract from ``test.info().annotations`` docgen entries.""" + src = spec_path.read_text(encoding="utf-8") + bindings = _ts_docgen_annotation_bindings(src) + if not bindings: + raise ManifestError( + f"no docgen annotation found in {spec_path.name} " + f"(expected test.info().annotations with type 'docgen', or add " + f"{_playwright_sidecar_paths(spec_path)[-1].name})" + ) + + if grep: + exact = [b for b in bindings if b.test_title == grep] + if exact: + matches = exact + else: + matches = [b for b in bindings if grep in b.test_title] + if not matches: + raise ManifestError( + f"no test matched --grep {grep!r} in {spec_path.name} " + f"(available: {[b.test_title for b in bindings]})" + ) + if len(matches) > 1: + raise ManifestError( + f"ambiguous --grep {grep!r}: matched {[b.test_title for b in matches]}" + ) + chosen = matches[0] + else: + if len(bindings) > 1: + titles = [b.test_title for b in bindings] + raise ManifestError( + f"multiple docgen annotations in {spec_path.name} without --grep " + f"or path.ts::title (tests: {titles}). Add a sibling " + f"{_playwright_sidecar_paths(spec_path)[0].name} " + "or pass --grep." + ) + chosen = bindings[0] + + desc = chosen.description_json + if not isinstance(desc, str): + raise ManifestError("internal: docgen description must be str") + try: + if desc.lstrip().startswith("{"): + contract = _json_from_js_object_literal(desc) + else: + contract = json.loads(desc) + except json.JSONDecodeError as exc: + raise ManifestError( + f"docgen annotation JSON is invalid near line {chosen.line}: {exc}" + ) from exc + if not isinstance(contract, dict): + raise ManifestError("docgen annotation description must be a JSON object") + + demo = contract.get("demonstration") + if isinstance(demo, dict) and demo.get("kind") == "playwright": + # Declarative ``url`` + ``actions`` uses the in-process Python driver; only + # add ``spec``/``grep`` for Node ``npx playwright test`` when there is no ``url``. + if not demo.get("url"): + if not demo.get("spec"): + demo = dict(demo) + demo["spec"] = str(spec_path) + if not demo.get("grep"): + demo = dict(demo) + demo["grep"] = chosen.test_title + contract = dict(contract) + contract["demonstration"] = demo + return contract + + +@dataclass +class _TsDocgenBinding: + test_title: str + description_json: str + line: int + + +def _ts_docgen_annotation_bindings(src: str) -> list[_TsDocgenBinding]: + """Find ``type: 'docgen'`` payloads inside each ``test('title', ...)`` block.""" + out: list[_TsDocgenBinding] = [] + # Playwright tests usually start at beginning of line; avoid matching `latest`. + for m in re.finditer( + r"(?:^|\n)\s*test\s*(?:\.(?:only|skip|fixme))?\s*\(\s*(['\"])((?:\\.|(?!\1).)*)\1", + src, + re.MULTILINE, + ): + raw_title = m.group(2) + try: + test_title = bytes(raw_title, "utf-8").decode("unicode_escape") + except UnicodeDecodeError: + test_title = raw_title + line_no = src.count("\n", 0, m.start()) + 1 + start = m.end() + next_m = re.search( + r"(?:^|\n)\s*test\s*(?:\.(?:only|skip|fixme))?\s*\(\s*['\"]", + src[start + 1 :], + re.MULTILINE, + ) + block_end = start + 1 + next_m.start() if next_m else len(src) + block = src[start:block_end] + if not re.search(r"type\s*:\s*['\"]docgen['\"]", block, re.IGNORECASE): + continue + + desc_json = _ts_extract_docgen_description_json(block) + if desc_json is None: + continue + out.append( + _TsDocgenBinding( + test_title=test_title, + description_json=desc_json, + line=line_no, + ) + ) + return out + + +def _ts_extract_docgen_description_json(block: str) -> str | None: + """Return JSON text from ``description:`` after ``type: 'docgen'`` in *block*.""" + idx = 0 + while True: + m_type = re.search( + r"type\s*:\s*['\"]docgen['\"]\s*,\s*description\s*:\s*", + block[idx:], + re.IGNORECASE, + ) + if not m_type: + return None + pos = idx + m_type.end() + rest = block[pos:].lstrip() + + low = rest[:20].lower() + if low.startswith("json.stringify"): + open_paren = rest.find("(") + if open_paren == -1: + idx = pos + continue + j = open_paren + 1 + while j < len(rest) and rest[j] in " \t\n\r": + j += 1 + if j >= len(rest) or rest[j] != "{": + idx = pos + continue + end_obj = _ts_find_matching_brace(rest, j) + if end_obj == -1: + idx = pos + continue + return _strip_js_comments(rest[j : end_obj + 1]) + + q = rest[0] if rest else "" + if q in "'\"": + end = 1 + escaped = False + while end < len(rest): + ch = rest[end] + if escaped: + escaped = False + elif ch == "\\": + escaped = True + elif ch == q: + raw = rest[1:end] + try: + return bytes(raw, "utf-8").decode("unicode_escape") + except UnicodeDecodeError: + return raw + end += 1 + return None + idx = pos + + +def _ts_find_matching_brace(s: str, open_idx: int) -> int: + """Return index of ``}`` matching ``{`` at *open_idx*, or -1.""" + if open_idx >= len(s) or s[open_idx] != "{": + return -1 + depth = 0 + i = open_idx + in_str: str | None = None + escaped = False + while i < len(s): + ch = s[i] + if in_str: + if escaped: + escaped = False + elif ch == "\\": + escaped = True + elif ch == in_str: + in_str = None + i += 1 + continue + if ch in "'\"`": + in_str = ch + i += 1 + continue + if ch == "{": + depth += 1 + elif ch == "}": + depth -= 1 + if depth == 0: + return i + i += 1 + return -1 + + +def _strip_js_comments(fragment: str) -> str: + """Remove // and /* */ comments from a JS object literal fragment (best-effort).""" + out: list[str] = [] + k = 0 + n = len(fragment) + in_str: str | None = None + while k < n: + ch = fragment[k] + if in_str: + if ch == "\\" and k + 1 < n: + out.append(ch) + out.append(fragment[k + 1]) + k += 2 + continue + out.append(ch) + if ch == in_str: + in_str = None + k += 1 + continue + if ch in "'\"": + in_str = ch + out.append(ch) + k += 1 + continue + if ch == "/" and k + 1 < n: + nxt = fragment[k + 1] + if nxt == "/": + k += 2 + while k < n and fragment[k] not in "\n\r": + k += 1 + continue + if nxt == "*": + k += 2 + while k + 1 < n and not (fragment[k] == "*" and fragment[k + 1] == "/"): + k += 1 + k = min(k + 2, n) + continue + out.append(ch) + k += 1 + return "".join(out) + + +def _json_from_js_object_literal(fragment: str) -> dict[str, Any]: + """Parse a JS-style object literal (possibly with trailing commas) as JSON.""" + cleaned = _strip_trailing_commas_json(_strip_js_comments(fragment)) + return json.loads(cleaned) + + +def _strip_trailing_commas_json(s: str) -> str: + """Remove trailing commas before ``}`` and ``]`` (common in TS/JS).""" + prev = None + while prev != s: + prev = s + s = re.sub(r",(\s*})", r"\1", s) + s = re.sub(r",(\s*])", r"\1", s) + return s + + def _load_yaml_sidecar(path: Path) -> Manifest: raw = yaml.safe_load(path.read_text(encoding="utf-8")) or {} if not isinstance(raw, dict): @@ -348,11 +695,12 @@ def _coerce(raw: dict[str, Any], *, source_path: Path | None = None) -> Manifest elif not pw_spec.is_absolute(): pw_spec = Path.cwd() / pw_spec pw_spec = pw_spec.resolve() - if grep_raw is None or not str(grep_raw).strip(): - raise ManifestError( - "demonstration.grep is required when demonstration.spec is set" - ) + if grep_raw is not None and str(grep_raw).strip(): pw_grep = str(grep_raw).strip() + if pw_spec is not None and not pw_grep: + raise ManifestError( + "demonstration.grep is required when demonstration.spec is set" + ) if pw_cwd_raw is not None: pw_cwd = Path(str(pw_cwd_raw).strip()) if not pw_cwd.is_absolute() and source_path is not None: @@ -1264,6 +1612,7 @@ def run_cli( manifest_arg: str, output_dir_arg: str, *, + grep: str | None = None, cache_dir_arg: str | None = None, no_narration: bool = False, stderr=None, @@ -1280,7 +1629,7 @@ def run_cli( stdout = sys.stdout try: - manifest = load_manifest(manifest_arg) + manifest = load_manifest(manifest_arg, grep=grep) except FileNotFoundError as exc: print(f"[demo-function] {exc}", file=stderr) return EXIT_INVALID diff --git a/tests/test_demo_function.py b/tests/test_demo_function.py index 6444e3e..ff13d29 100644 --- a/tests/test_demo_function.py +++ b/tests/test_demo_function.py @@ -561,3 +561,176 @@ def test_thing(): assert [act.kind for act in a.actions] == [act.kind for act in b.actions] assert a.resolution == b.resolution assert a.duration_seconds == b.duration_seconds + + +# --------------------------------------------------------------------------- +# Playwright TypeScript: sidecar + inline docgen annotation +# --------------------------------------------------------------------------- + + +def test_ts_sidecar_loads_and_sets_spec_grep(tmp_path: Path) -> None: + spec = tmp_path / "lesson.spec.ts" + spec.write_text("// placeholder\n", encoding="utf-8") + side = tmp_path / "lesson.docgen.yaml" + side.write_text( + _yaml_manifest_text( + demonstration={ + "kind": "playwright", + "grep": "compiles", + }, + ), + encoding="utf-8", + ) + m = load_manifest(spec) + assert m.pw_spec == spec.resolve() + assert m.pw_grep == "compiles" + assert m.fn_source_path == spec.resolve() + + +def test_ts_sidecar_alternate_name_lesson_docgen_yaml(tmp_path: Path) -> None: + """``lesson.spec.ts`` may pair with ``lesson.docgen.yaml`` (not only ``lesson.spec.docgen.yaml``).""" + spec = tmp_path / "lesson.spec.ts" + spec.write_text("//\n", encoding="utf-8") + alt = tmp_path / "lesson.docgen.yaml" + alt.write_text( + _yaml_manifest_text( + demonstration={ + "kind": "playwright", + "grep": "g", + }, + ), + encoding="utf-8", + ) + m = load_manifest(spec) + assert m.pw_grep == "g" + + +def test_ts_sidecar_requires_grep(tmp_path: Path) -> None: + spec = tmp_path / "x.spec.ts" + spec.write_text("//\n", encoding="utf-8") + side = tmp_path / "x.docgen.yaml" + side.write_text( + _yaml_manifest_text( + demonstration={"kind": "playwright", "url": "http://127.0.0.1/"}, + ), + encoding="utf-8", + ) + with pytest.raises(ManifestError, match=r"demonstration\.grep is required"): + load_manifest(spec) + + +def test_ts_inline_json_stringify_contract(tmp_path: Path) -> None: + spec = tmp_path / "api.spec.ts" + spec.write_text( + r''' +import { test, expect } from "@playwright/test"; + +test("does the thing", async ({ page }) => { + test.info().annotations.push({ + type: "docgen", + description: JSON.stringify({ + "identifier": "pkg/Foo.ts:bar", + "intent": "Runs the demo.", + "demonstration": { + "kind": "playwright", + "url": "http://127.0.0.1:3000/", + "actions": [{ "kind": "click", "selector": "#go" }], + }, + "output_budget": { "duration_seconds": 10, "resolution": "800x600" }, + }), + }); + await page.goto("http://127.0.0.1:3000/"); +}); +''', + encoding="utf-8", + ) + m = load_manifest(spec) + assert m.identifier == "pkg/Foo.ts:bar" + assert m.intent == "Runs the demo." + assert m.url == "http://127.0.0.1:3000/" + assert m.pw_spec is None + assert m.pw_grep is None + assert m.fn_source_path == spec.resolve() + assert m.resolution == "800x600" + + +def test_ts_path_with_title_same_as_grep(tmp_path: Path) -> None: + spec = tmp_path / "api.spec.ts" + spec.write_text( + r''' +import { test } from "@playwright/test"; + +test("Alpha case", async () => { + test.info().annotations.push({ + type: "docgen", + description: JSON.stringify({ + "identifier": "a:b", + "intent": "i", + "demonstration": { "kind": "playwright", "url": "http://x/", "actions": [] }, + }), + }); +}); + +test("Beta", async () => { + test.info().annotations.push({ + type: "docgen", + description: JSON.stringify({ + "identifier": "c:d", + "intent": "j", + "demonstration": { "kind": "playwright", "url": "http://y/", "actions": [] }, + }), + }); +}); +''', + encoding="utf-8", + ) + m = load_manifest(f"{spec}::Alpha case") + assert m.identifier == "a:b" + m2 = load_manifest(spec, grep="Alpha case") + assert m2.identifier == "a:b" + + +def test_ts_multiple_docgen_requires_grep(tmp_path: Path) -> None: + spec = tmp_path / "multi.spec.ts" + spec.write_text( + r''' +import { test } from "@playwright/test"; + +test("one", async () => { + test.info().annotations.push({ + type: "docgen", + description: JSON.stringify({ + "identifier": "a:b", + "intent": "i", + "demonstration": { "kind": "playwright", "url": "http://x/", "actions": [] }, + }), + }); +}); + +test("two", async () => { + test.info().annotations.push({ + type: "docgen", + description: JSON.stringify({ + "identifier": "c:d", + "intent": "j", + "demonstration": { "kind": "playwright", "url": "http://y/", "actions": [] }, + }), + }); +}); +''', + encoding="utf-8", + ) + with pytest.raises(ManifestError, match="multiple docgen"): + load_manifest(spec) + + +def test_run_cli_passes_grep_to_ts_manifest(tmp_path: Path) -> None: + spec = tmp_path / "z.spec.ts" + spec.write_text("//\n", encoding="utf-8") + from unittest.mock import patch + + with patch.object(df, "load_manifest") as lm: + lm.side_effect = ManifestError("stop early") + code = run_cli(str(spec), str(tmp_path / "out"), grep="pick me", no_narration=True) + assert code == df.EXIT_INVALID + lm.assert_called_once_with(str(spec), grep="pick me")