diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 42aaa4105..3a0b2211e 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -7,9 +7,7 @@ on: - 'components/**' - 'docs/**' - 'bin/build-docs-bundle.sh' - - 'bin/build-reference.py' - - 'bin/_docs_components.py' - - 'bin/_load_catalog.py' + - 'bin/build-reference.php' - 'composer.json' - 'composer.lock' - '.github/workflows/docs.yml' @@ -46,7 +44,7 @@ jobs: rm -f docs/assets/php-toolkit.zip zip -qr docs/assets/php-toolkit.zip components vendor bootstrap.php composer.json \ -x "*/Tests/*" "*/tests/*" "*/.git/*" "*/.github/*" "*/node_modules/*" - python3 bin/build-reference.py + php bin/build-reference.php - uses: actions/upload-pages-artifact@v3 with: diff --git a/.github/workflows/snippet-tests.yml b/.github/workflows/snippet-tests.yml index 4d84ab70b..902deea85 100644 --- a/.github/workflows/snippet-tests.yml +++ b/.github/workflows/snippet-tests.yml @@ -5,17 +5,16 @@ name: Verify docs snippets # next to the snippet in markdown. Anything that drifts fails CI; anything # that errors out also fails CI. # -# Snippets in run-snippets.py's NO_EXPECTED allowlist are runnable but their -# stdout is unstable (real network traffic, timestamps), so they're verified -# to exit 0 without an output comparison. +# Snippets in run-snippets.php's NO_EXPECTED allowlist are runnable but +# their stdout is unstable (real network traffic, timestamps), so they're +# verified to exit 0 without an output comparison. on: pull_request: paths: - 'components/**' - - 'bin/_docs_components.py' - - 'bin/_load_catalog.py' - - 'bin/run-snippets.py' + - 'bin/build-reference.php' + - 'bin/run-snippets.php' - 'composer.json' - 'composer.lock' - '.github/workflows/snippet-tests.yml' @@ -36,13 +35,8 @@ jobs: tools: composer coverage: none - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - name: Install toolkit dependencies run: composer install --no-dev --optimize-autoloader --no-progress - name: Run docs snippets and compare to expected output - run: bin/run-snippets.py --check + run: php bin/run-snippets.php --check diff --git a/README.md b/README.md index 217e20bcb..33778599a 100644 --- a/README.md +++ b/README.md @@ -133,11 +133,11 @@ composer lint-fix #### Building the docs site -The docs site under `docs/` is generated from `bin/_docs_components/.md`. To rebuild and preview locally: +The docs site under `docs/` is generated from each `components//README.md`. To rebuild and preview locally: ```sh bash bin/build-docs-bundle.sh # bundles toolkit + regenerates HTML -python3 bin/serve-docs.py # opens http://localhost:8787 +php bin/serve-docs.php # opens http://localhost:8787 ``` Snippets in the markdown sources run in CI on every PR (see `.github/workflows/snippet-tests.yml`) and in WordPress Playground from the live site. diff --git a/bin/_docs_components.py b/bin/_docs_components.py deleted file mode 100644 index 8a29ed7a3..000000000 --- a/bin/_docs_components.py +++ /dev/null @@ -1,231 +0,0 @@ -# Component catalog for the runnable docs site. -# -# Per-component content (lede, sections, snippets, credit callouts, -# see-also links, expected snippet outputs) is sourced from each -# components//README.md — see bin/_load_catalog.py for the format. -# The README *is* the catalog source: GitHub and Packagist render it as -# a normal README (frontmatter is hidden by GitHub's renderer); the -# build pipeline parses the frontmatter + snippet metadata blocks to -# generate the docs site and run snippets in CI. -# -# This file still owns the small global metadata that doesn't belong in any -# single component's markdown: the landing-page starter paths and the -# per-component mental-model guides used on the landing page. - -import os as _os -import sys as _sys - -_sys.path.insert(0, _os.path.dirname(_os.path.abspath(__file__))) - -from _load_catalog import load_components # noqa: E402 - -COMPONENTS = load_components() - - - -COMPONENT_GUIDES = { - 'html': { - 'mental_model': - '

Start with the tag processor when you need to change markup that WordPress already stored: add loading="lazy" to post images, make feed links absolute, or remove inline event handlers from pasted HTML. It scans forward and preserves every byte it does not touch.

' - '

Switch to the full processor when the browser tree matters. Use it to find images inside figures, walk heading depth, or return to a saved parent after inspecting child tags.

', - 'journey': ( - ('Rewrite one tag safely', 'Add image attributes without parsing a DOM or changing surrounding whitespace.'), - ('Protect real content', 'Rewrite relative links, remove script behavior, and add CSP nonces without clobbering author-provided attributes.'), - ('Use structure when you need it', 'Find figure images, print a heading outline, and use bookmarks to annotate a parent after scanning its children.'), - ), - }, - 'zip': { - 'mental_model': - '

Treat a ZIP as a small filesystem with a table of contents at the end. Read the central directory, open one entry stream, and copy that entry where it belongs.

' - '

Use ZipFilesystem when your code wants get_contents() and ls(). Use ZipEncoder and ZipDecoder when the archive format matters, such as an EPUB that must store mimetype first and uncompressed.

', - 'journey': ( - ('Open an archive as files', 'Read readme.txt through ZipFilesystem before touching entry headers.'), - ('Write a format with rules', 'Build an EPUB and make the first entry Stored, not Deflated.'), - ('Move archives through streams', 'Repack entries, reject ../ paths, and copy a remote ZIP entry into another filesystem without a manual byte loop.'), - ), - }, - 'bytestream': { - 'mental_model': - '

A read stream separates three actions: pull bytes, inspect the buffer, then consume the bytes you accepted. That pattern lets a parser wait for a full line, a ZIP decoder wait for a complete header, or an HTTP client report progress without losing data.

' - '

Write streams make the destination boring. The caller writes chunks; the sink decides whether those bytes go to memory, a file, a compressor, or another component.

', - 'journey': ( - ('Read in chunks', 'Pull from memory and files with the same loop, then stop only when the stream reaches end-of-data.'), - ('Handle awkward boundaries', 'Read lines split across chunks and connect producers to consumers with MemoryPipe.'), - ('Add behavior around bytes', 'Wrap streams with gzip, hashing, limits, and windows while keeping the caller on the same interface.'), - ), - }, - 'filesystem': { - 'mental_model': - '

Write your tool against a filesystem object, not against the host machine. Tests can pass an in-memory tree, a CLI command can pass a local directory, and an importer can pass a ZIP-backed filesystem.

' - '

Every toolkit path uses forward slashes. A path such as wp-content/uploads/2026/logo.png means the same thing on macOS, Windows, Playground, and inside an archive.

', - 'journey': ( - ('Start in memory', 'Write and list files without touching disk, which makes examples and tests deterministic.'), - ('Move to a real backend', 'Use local, SQLite, and atomic-write examples to keep the same calling style while changing storage.'), - ('Copy between backends', 'Move a generated theme file from memory to disk, or from a ZIP archive into a local staging directory, through one helper.'), - ), - }, - 'blockparser': { - 'mental_model': - '

The parser turns serialized post content into the block array shape WordPress core returns. It does not render blocks, load block.json, or ask a registry whether a block exists.

' - '

Handle blockName === null first. A real post can contain a paragraph block, a custom block, and loose HTML before or after both.

', - 'journey': ( - ('Inspect the returned shape', 'Parse one paragraph block and read blockName, attrs, innerBlocks, innerHTML, and innerContent.'), - ('Walk the tree', 'Count nested blocks and find custom blocks without writing recursive boilerplate everywhere.'), - ('Ask editorial questions', 'Detect skipped heading levels, stale embeds, and blocks that need a migration before import.'), - ), - }, - 'markdown': { - 'mental_model': - '

Use Markdown for files that humans edit and block markup for content that WordPress stores. This component translates the supported middle ground: headings, paragraphs, lists, code blocks, links, images, and frontmatter-backed metadata.

' - '

Keep unsupported syntax visible. A migration tool should tell you that a file contains an unsupported table instead of silently dropping it before publishing.

', - 'journey': ( - ('Convert one document', 'Turn posts/launch.md into block markup and turn supported blocks back into readable Markdown.'), - ('Carry metadata beside content', 'Read frontmatter for title, slug, date, tags, and import hints.'), - ('Prepare a folder import', 'Map filenames to slugs, audit generated blocks, and hand the result to DataLiberation when you need WXR.'), - ), - }, - 'xml': { - 'mental_model': - '

XMLProcessor walks XML as a cursor. It reads the next tag, exposes attributes and text, records edits, and emits updated XML only when you call get_updated_xml().

' - '

Query namespaces by URI, not by prefix. In WXR, look for http://wordpress.org/export/1.2/ even when the source file writes the prefix as wp:.

', - 'journey': ( - ('Edit one attribute', 'Bump product prices and see how buffered updates keep untouched XML intact.'), - ('Read namespaced exports', 'Find WXR status fields and attachment URLs by namespace URI and local name.'), - ('Process export-sized files', 'Rewrite staging URLs and parse OPML without building a full in-memory tree.'), - ), - }, - 'encoding': { - 'mental_model': - '

Validate text with the Encoding helpers before a stricter parser sees unfamiliar bytes. A Latin-1 title from an old export, an overlong UTF-8 sequence in an upload, or a Unicode noncharacter can break XML, JSON, or a database write later in the pipeline — and the further downstream the failure happens, the harder it is to trace.

' - '

The component gives the same answer whether PHP has mbstring available or falls back to the pure-PHP scanner.

', - 'journey': ( - ('Reject invalid bytes', 'Separate clean UTF-8 from Latin-1 bytes, overlong forms, surrogate halves, and incomplete sequences.'), - ('Repair when content matters', 'Replace invalid bytes with U+FFFD when keeping the rest of a post title beats stopping the import.'), - ('Check downstream limits', 'Detect noncharacters before writing XML or handing text to a system with stricter Unicode rules.'), - ), - }, - 'dataliberation': { - 'mental_model': - '

Model a migration as a stream of WordPress-shaped entities. Read a post, rewrite its content and metadata, write it out, then move to the next entity.

' - '

The useful work happens between readers and writers: rewrite https://staging.example.test inside HTML, block attributes, CSS, GUIDs, and media URLs; download attachments; and keep enough state to resume after a failed request.

', - 'journey': ( - ('Write one entity', 'Create a WXR post record and read it back before building a site-sized pipeline.'), - ('Transform as you stream', 'Rewrite URLs on each entity without loading the whole export.'), - ('Compose a migration', 'Convert a Markdown folder, frontload media with HttpClient, and write WXR through XML and ByteStream layers.'), - ), - }, - 'git': { - 'mental_model': - '

Git stores snapshots as objects: blobs hold file bytes, trees hold directory listings, commits point at trees, and refs name commits.

' - '

This component keeps those objects visible. A browser-based editor can commit generated files, move refs/heads/main, expose a commit tree as a filesystem, and merge another branch without running the git binary.

', - 'journey': ( - ('Create a snapshot', 'Commit files into an in-memory repository and print the resulting object ID.'), - ('Read history by name', 'Resolve refs, walk parent commits, and mount a commit tree with GitFilesystem.'), - ('Coordinate edits', 'Create branches, merge content, and keep conflicts explicit for the caller.'), - ), - }, - 'merge': { - 'mental_model': - '

A three-way merge needs the common base, your version, and their version. The base tells the merger whether two lines changed independently or collided.

' - '

Start with line merges for Markdown, config files, and generated PHP. Move to a domain-specific differ only when lines hide the real unit of change.

', - 'journey': ( - ('See the edit', 'Generate a diff and patch so the merge inputs feel concrete.'), - ('Auto-merge independent lines', 'Combine two edits that touch different parts of the same file.'), - ('Surface conflicts', 'Return conflict records for a UI, CLI prompt, or sync log instead of guessing a winner.'), - ), - }, - 'httpclient': { - 'mental_model': - '

Make the first request boring: GET https://api.wordpress.org/plugins/info/1.2/, then read the response status and body. From there, add the details the workflow actually needs: a POST body, JSON headers, redirects, cache policy, or a chosen transport.

' - '

When the response becomes a file, keep it as a stream. A plugin installer can show progress while downloading a ZIP, resume a partial archive with Range, and hand the remote body to ZipFilesystem without first building a giant string.

', - 'journey': ( - ('Start with GET and POST', 'Fetch a URL, submit form data, and build a JSON request before touching lower-level objects.'), - ('Configure the request path', 'Choose a transport, follow redirects, cache responses, and report failures with useful context.'), - ('Scale the transfer', 'Show progress, keep ten media downloads active, resume a partial ZIP, and stream-unzip a remote archive through Filesystem helpers.'), - ), - }, - 'httpserver': { - 'mental_model': - '

Use HttpServer when a PHP tool needs one local endpoint. A CLI command can open http://127.0.0.1:8765/callback for an OAuth flow, serve fixture JSON to HttpClient tests, or expose a tiny status page during an import.

' - '

The server accepts a connection, parses one request, and gives your handler a response writer. Keep the process lifetime and shutdown rule in your command.

', - 'journey': ( - ('Serve one response', 'Bind to loopback and return text from a handler.'), - ('Route a small local API', 'Branch on method and path for /api/status and /api/echo.'), - ('Buffer when headers depend on the body', 'Use the buffered writer when the runtime needs the full response before sending headers.'), - ), - }, - 'corsproxy': { - 'mental_model': - '

A browser app cannot read https://api.github.com/repos/WordPress/php-toolkit unless GitHub sends CORS headers the app can use. A PHP proxy can fetch that URL server-side and return a controlled browser-readable response.

' - '

Deploy the proxy as a gate, not as an open tunnel. Allow api.github.com and raw.githubusercontent.com for a docs tool; reject private IP ranges, unknown hosts, oversized responses, and credential-bearing request headers.

', - 'journey': ( - ('See the proxy URL shape', 'Request /cors-proxy.php/https://api.github.com/repos/WordPress/php-toolkit from a local PHP server.'), - ('Lock down deployment', 'Add a rate limiter and a host allowlist before exposing the proxy.'), - ('Use it from the browser', 'Wrap fetch() once, then deploy the PHP script behind nginx or another SAPI.'), - ), - }, - 'cli': { - 'mental_model': - '

Define the command-line contract once, then parse argv against it. The parser returns positional arguments and named options; your application validates them and runs the command.

' - '

A command such as toolkit import posts/launch.md --site=demo --dry-run -vv should not need a console framework just to understand flags, values, and positionals.

', - 'journey': ( - ('Parse the smallest command', 'Read one boolean flag and one positional argument.'), - ('Accept normal shell shapes', 'Handle --port=8080, --port 8080, -p 8080, and bundled booleans such as -afv.'), - ('Build command behavior', 'Add required options, help output, and subcommand dispatch in application code.'), - ), - }, - 'polyfill': { - 'mental_model': - '

Load Polyfill when toolkit code runs outside WordPress but still calls WordPress-shaped helpers. Standalone tests can call esc_html(), add a filter, or use a translation stub without booting WordPress.

' - '

The component defines only missing functions. If WordPress or the current PHP runtime already provides a function, the polyfill leaves it alone.

', - 'journey': ( - ('Backfill missing PHP helpers', 'Use PHP 7.2-compatible helpers without dropping support for older runtimes.'), - ('Keep familiar WordPress calls', 'Escape output and keep translation-shaped call sites in standalone tools.'), - ('Expose extension points', 'Register filters and actions for library code that needs hooks outside WordPress.'), - ), - }, - 'blueprints': { - 'mental_model': - '

A Blueprint is a versioned recipe for a WordPress site. It can install Gutenberg, set permalink structure, import content, copy files, and run WP-CLI steps in a predictable order.

' - '

The runner supplies the environment: site root, site URL, execution mode, and filesystem access. The validator checks user-authored JSON before the runner mutates the target site.

', - 'journey': ( - ('Configure the target', 'Create a RunnerConfiguration with the site path, URL, and execution mode.'), - ('Generate repeatable recipes', 'Build JSON from PHP when tests or docs need a fresh site with the same plugins and options.'), - ('Validate before running', 'Catch misspelled step names and missing fields before installing packages or changing options.'), - ), - }, - 'coding-standards': { - 'mental_model': - '

Turn repeat review comments into PHPCS sniffs. If the project always rejects short ternaries, loose comparisons, or a confusing Yoda condition, the tool should report it before a reviewer does.

' - '

Keep each sniff narrow. A useful sniff names the risky pattern and shows the replacement code shape contributors should write.

', - 'journey': ( - ('Enable the ruleset', 'Point PHPCS at the toolkit standard from a component or CI job.'), - ('Read the rule as review guidance', 'Learn why the Yoda and short-ternary sniffs exist instead of treating them as arbitrary style.'), - ('Write the explicit form', 'Replace compact syntax with code that stays clear on PHP 7.2 and across WordPress-style projects.'), - ), - }, -} - - -STARTER_PATHS = ( - ( - 'Content and migration', - 'Start here when you are importing, exporting, rewriting, or auditing WordPress content.', - ('html', 'blockparser', 'markdown', 'xml', 'dataliberation'), - ), - ( - 'Streams and storage', - 'Use this path for archives, large files, testable storage backends, and pure-PHP file movement.', - ('bytestream', 'filesystem', 'zip', 'git', 'merge'), - ), - ( - 'Networked tools', - 'Use this path for clients, local fixture servers, browser-facing proxies, and CLI workflows.', - ('httpclient', 'httpserver', 'corsproxy', 'cli'), - ), - ( - 'WordPress runtime support', - 'Use this path when your code needs WordPress-shaped helpers, repeatable sites, or project-specific review rules.', - ('polyfill', 'blueprints', 'coding-standards'), - ), -) diff --git a/bin/_load_catalog.py b/bin/_load_catalog.py deleted file mode 100644 index a63a2f7f0..000000000 --- a/bin/_load_catalog.py +++ /dev/null @@ -1,389 +0,0 @@ -"""Loads each `components//README.md` into the COMPONENTS data -structure the build scripts and the snippet runner consume. - -The README *is* the catalog source: it doubles as the GitHub/Packagist -README and as the docs-site catalog. YAML-style frontmatter at the top -carries the slug/title/install/credit/see-also metadata; the body is -plain markdown with fenced PHP snippets and `` -fenced blocks. GitHub's renderer hides frontmatter from the README view -on github.com, so the metadata is invisible to readers but available to -the build pipeline. - -Markdown file format (one per component): - - --- - slug: - title: - install: <wp-php-toolkit/...> # optional - - credit_title: <one-line summary> # optional credit callout - credit_body: | - <multi-line HTML — one block per - indented line, joined verbatim> - - see_also: <slug> | <Title> | <reason> # optional, repeatable - see_also: <slug> | <Title> | <reason> - --- - - <lede HTML> - - ## Section heading - - <body HTML — paragraphs separated by blank lines> - - <!-- snippet: - filename: <name>.php - runnable: true | false # default: true - --> - ```php - <?php - require '...'; - ... - ``` - - <!-- expected-output --> - ``` - <verbatim expected stdout> - ``` - -The php fence holds the snippet verbatim. The optional expected-output -fence (immediately after the php fence, with the `<!-- expected-output -->` -marker on its own line) carries the captured stdout used by the docs site -for instant pre-render and by run-snippets.py for CI verification. - -The loader exposes both a richer dict-shape catalog and a legacy -COMPONENTS list of (slug, title, lede, install, sections) tuples — the -latter for backward compatibility with build-reference.py call sites that -existed before this refactor. -""" - -import os -import re - -THIS = os.path.dirname(os.path.abspath(__file__)) -ROOT = os.path.dirname(THIS) -COMPONENTS_ROOT = os.path.join(ROOT, 'components') - -# Slug → component-directory mapping. Each component's README.md *is* the -# catalog source: it carries the YAML-style frontmatter, lede, sections, -# snippets, and expected-output blocks the docs site needs. The ordered -# tuple here also defines the order components appear on the landing page -# and in the reference sidebar. -COMPONENT_ORDER = ( - ('html', 'HTML'), - ('zip', 'Zip'), - ('bytestream', 'ByteStream'), - ('filesystem', 'Filesystem'), - ('blockparser', 'BlockParser'), - ('markdown', 'Markdown'), - ('xml', 'XML'), - ('encoding', 'Encoding'), - ('dataliberation', 'DataLiberation'), - ('git', 'Git'), - ('merge', 'Merge'), - ('httpclient', 'HttpClient'), - ('httpserver', 'HttpServer'), - ('corsproxy', 'CORSProxy'), - ('cli', 'CLI'), - ('polyfill', 'Polyfill'), - ('blueprints', 'Blueprints'), - ('coding-standards', 'ToolkitCodingStandards'), -) - -_FRONTMATTER_RE = re.compile(r'\A---\n(.*?)\n---\n?', re.DOTALL) -_SNIPPET_RE = re.compile( - # Snippet metadata + ```php fence. The fence is a backreference so the - # closing run matches the opening run exactly (snippets that contain a - # literal triple-backtick are extracted with a 4-tick fence). - # Optionally followed by an `<!-- expected-output -->` marker and a - # second fence holding the captured stdout. - r'<!--\s*snippet:\s*\n(?P<meta>.*?)\n-->\s*\n(?P<fence>`{3,})php\n(?P<code>.*?)\n(?P=fence)' - r'(?:\s*\n\s*<!--\s*expected-output\s*-->\s*\n(?P<exp_fence>`{3,})\w*\n(?P<expected>.*?)\n(?P=exp_fence))?', - re.DOTALL, -) - - -def _parse_frontmatter(text): - """Parse a small YAML-subset frontmatter block. - - Supported shapes: - - ``key: value`` on a single line (string value). - - ``key: |`` followed by indented continuation lines (multi-line - string; indentation is stripped). - - Repeated ``key: value`` lines for the same key (list, in source - order). - """ - m = _FRONTMATTER_RE.match(text) - if not m: - raise ValueError('Missing YAML-style frontmatter (--- ... ---)') - fields = {} - lines = m.group(1).splitlines() - i = 0 - while i < len(lines): - line = lines[i] - if not line.strip(): - i += 1 - continue - if ':' not in line: - raise ValueError(f'Bad frontmatter line: {line!r}') - key, _, val = line.partition(':') - key = key.strip() - val = val.strip() - - if val == '|': - # Multi-line block: collect indented lines that follow. - block = [] - i += 1 - while i < len(lines): - nxt = lines[i] - if nxt.startswith(' ') or nxt.startswith('\t'): - block.append(nxt[2:] if nxt.startswith(' ') else nxt[1:]) - i += 1 - elif not nxt.strip(): - block.append('') - i += 1 - else: - break - value = '\n'.join(block).rstrip('\n') - else: - value = val - i += 1 - - if key in fields: - existing = fields[key] - if isinstance(existing, list): - existing.append(value) - else: - fields[key] = [existing, value] - else: - fields[key] = value - return fields, text[m.end():] - - -def _split_sections(body): - """Split a markdown body on H2 boundaries (`## Heading`) at column 0, - skipping `## ` lines that appear inside fenced code blocks (those are - snippet content or expected output, not section headings). - - Returns (lede, [(heading, content), ...]). - """ - lines = body.split('\n') - fence = None # current open fence string (e.g. "```", "````"), or None - boundaries = [] # (line_index, heading) for each H2 - fence_re = re.compile(r'^(?P<f>`{3,})') - h2_re = re.compile(r'^##\s+(.+?)\s*$') - for i, line in enumerate(lines): - m_fence = fence_re.match(line) - if m_fence: - f = m_fence.group('f') - if fence is None: - fence = f - elif len(f) >= len(fence): - fence = None - continue - if fence is None: - m_h2 = h2_re.match(line) - if m_h2: - boundaries.append((i, m_h2.group(1).strip())) - - if not boundaries: - return body.strip(), [] - lede = '\n'.join(lines[:boundaries[0][0]]).strip() - sections = [] - for idx, (line_idx, heading) in enumerate(boundaries): - end = boundaries[idx + 1][0] if idx + 1 < len(boundaries) else len(lines) - content = '\n'.join(lines[line_idx + 1:end]) - sections.append((heading, content)) - return lede, sections - - -def _extract_snippet(content): - """Pull `<!-- snippet: ... -->\n```php ... ``` [+ expected-output]` out - of a section. Returns (body_html, snippet_or_None) where the snippet is - a dict with keys: filename, code, runnable, expected_output (or None).""" - m = _SNIPPET_RE.search(content) - if not m: - return content.strip(), None - - meta = {} - for line in m.group('meta').splitlines(): - line = line.strip() - if not line or ':' not in line: - continue - key, _, val = line.partition(':') - meta[key.strip()] = val.strip() - filename = meta.get('filename') - if not filename: - raise ValueError(f'Snippet missing filename: {m.group("meta")!r}') - runnable_str = meta.get('runnable', 'true').lower() - runnable = runnable_str not in ('false', 'no', '0') - code = m.group('code') - expected = m.group('expected') - - snippet = { - 'filename': filename, - 'code': code, - 'runnable': runnable, - 'expected_output': expected if expected is not None else None, - } - body = (content[:m.start()] + content[m.end():]).strip() - return body, snippet - - -def _join_blocks(text): - """Re-flatten the markdown body back into the single-string HTML shape - that the legacy Python catalog produced. - - Blocks separated by *blank lines* in the markdown file join with no - separator in the runtime string, except blank lines inside a - ``<pre>...</pre>`` span which are preserved verbatim so embedded code - samples round-trip. - """ - if not text: - return '' - blocks = [] - current = [] - pre_depth = 0 - for line in text.split('\n'): - is_blank = not line.strip() - if is_blank and pre_depth == 0: - if current: - blocks.append('\n'.join(current).strip('\n')) - current = [] - continue - opens = len(re.findall(r'<pre\b', line, re.IGNORECASE)) - closes = len(re.findall(r'</pre\s*>', line, re.IGNORECASE)) - pre_depth += opens - closes - if pre_depth < 0: - pre_depth = 0 - current.append(line) - if current: - blocks.append('\n'.join(current).strip('\n')) - return ''.join(b for b in blocks if b) - - -def _parse_see_also(value): - """Convert ``<target> | Title | reason`` lines into (href, title, reason). - - ``<target>`` is either a component slug (rendered as ``<slug>.html``) or - a relative URL / absolute URL passed through verbatim. The detection is - naive: anything containing ``/`` or ``.`` is treated as a URL, - everything else is a slug. This lets entries point at sibling reference - pages (``see_also: blockparser | BlockParser | …``) or at learn-path - tutorials (``see_also: ../learn/01-rewriting-html.html | Tutorial — … | …``). - """ - if value is None: - return [] - items = value if isinstance(value, list) else [value] - out = [] - for item in items: - if not item.strip(): - continue - parts = [p.strip() for p in item.split('|')] - if len(parts) != 3: - raise ValueError(f'see_also must have three pipe-separated fields, got {item!r}') - target, title, reason = parts - if '/' in target or '.' in target: - href = target - else: - href = f'{target}.html' - out.append((href, title, reason)) - return out - - -def _legacy_snippet_tuple(snippet): - """Convert the rich snippet dict into the legacy tuple shape.""" - if snippet is None: - return None - if snippet['runnable']: - return (snippet['filename'], snippet['code']) - return (snippet['filename'], snippet['code'], False) - - -def load_components_rich(): - """Return per-component dicts with all metadata. Preferred for new code. - - Schema: - [ - { - 'slug': 'html', - 'title': 'HTML', - 'install': 'wp-php-toolkit/html', - 'lede': '<HTML lede>', - 'credit': ('Ported from WordPress core', '<HTML body>') | None, - 'see_also': [(slug, title, reason), ...], - 'sections': [ - { - 'heading': 'A minimal example', - 'body': '<HTML body>', - 'snippet': {'filename', 'code', 'runnable', 'expected_output'} | None, - }, - ... - ], - }, - ... - ] - """ - components = [] - for slug, dir_name in COMPONENT_ORDER: - path = os.path.join(COMPONENTS_ROOT, dir_name, 'README.md') - with open(path, encoding='utf-8') as f: - text = f.read() - fields, body = _parse_frontmatter(text) - if fields.get('slug') != slug: - raise ValueError( - f'{path}: frontmatter slug ({fields.get("slug")!r}) != filename ({slug!r})' - ) - title = fields.get('title') - if not title: - raise ValueError(f'{path}: missing title') - install = fields.get('install') or None - - credit = None - if fields.get('credit_title') or fields.get('credit_body'): - credit_title = fields.get('credit_title') or '' - credit_body = fields.get('credit_body') or '' - credit = (credit_title, credit_body) - - see_also = _parse_see_also(fields.get('see_also')) - - lede_md, raw_sections = _split_sections(body) - lede = _join_blocks(lede_md) - - sections = [] - for heading, content in raw_sections: - body_md, snippet = _extract_snippet(content) - body_html = _join_blocks(body_md) - sections.append({ - 'heading': heading, - 'body': body_html, - 'snippet': snippet, - }) - - components.append({ - 'slug': slug, - 'title': title, - 'install': install, - 'lede': lede, - 'credit': credit, - 'see_also': see_also, - 'sections': sections, - }) - - return components - - -def load_components(): - """Backward-compatible loader returning the legacy tuple shape. - - [(slug, title, lede, install, sections), ...] - sections = [(heading, body, snippet_tuple_or_None), ...] - """ - out = [] - for c in load_components_rich(): - sections = [ - (s['heading'], s['body'], _legacy_snippet_tuple(s['snippet'])) - for s in c['sections'] - ] - out.append((c['slug'], c['title'], c['lede'], c['install'], sections)) - return out diff --git a/bin/build-docs-bundle.sh b/bin/build-docs-bundle.sh index 1229fa147..59bb5991c 100755 --- a/bin/build-docs-bundle.sh +++ b/bin/build-docs-bundle.sh @@ -15,6 +15,6 @@ zip -qr docs/assets/php-toolkit.zip components vendor bootstrap.php composer.jso -x "*/Tests/*" "*/tests/*" "*/.git/*" "*/.github/*" "*/node_modules/*" echo "==> regenerating docs/reference/*.html from markdown" -python3 bin/build-reference.py +php bin/build-reference.php echo "Done. docs/assets/php-toolkit.zip = $(du -h docs/assets/php-toolkit.zip | cut -f1)" diff --git a/bin/build-reference.php b/bin/build-reference.php new file mode 100755 index 000000000..2736ed26f --- /dev/null +++ b/bin/build-reference.php @@ -0,0 +1,797 @@ +<?php +/** + * Generates docs/reference/<slug>.html for every component. + * + * The catalog comes from each components/<Name>/README.md — frontmatter + + * lede + sections + snippets + expected-output fences. + * + * Parsing: webuni/front-matter peels off the YAML frontmatter and + * league/commonmark parses the body into an AST. We walk the AST to + * pick out section boundaries (H2 headings), pitfall callouts (raw + * HTML blocks beginning with `<p>Footgun:` / `<p>Gotcha:`) and snippet + * triples (HTML comment + `php` fence + optional `<!-- expected-output -->` + * + plain fence). Both libraries are already vendored under + * components/Markdown/vendor-patched/ for the Markdown component. + */ + +declare(strict_types=1); + +namespace WordPress\Toolkit\DocsBuild; + +use League\CommonMark\Environment\Environment; +use League\CommonMark\Extension\CommonMark\CommonMarkCoreExtension; +use League\CommonMark\Extension\CommonMark\Node\Block\FencedCode; +use League\CommonMark\Extension\CommonMark\Node\Block\Heading; +use League\CommonMark\Extension\CommonMark\Node\Block\HtmlBlock; +use League\CommonMark\Node\Block\Document; +use League\CommonMark\Node\Block\Paragraph; +use League\CommonMark\Node\Inline\Text; +use League\CommonMark\Node\Node; +use League\CommonMark\Parser\MarkdownParser; +use League\CommonMark\Renderer\HtmlRenderer; + +if ( ! is_file( __DIR__ . '/../vendor/autoload.php' ) ) { + fwrite( STDERR, "Run `composer install` first.\n" ); + exit( 2 ); +} +require __DIR__ . '/../vendor/autoload.php'; + +const ASSET_VERSION = '20260504-php-rewrite'; +const ROOT = __DIR__ . '/..'; +const COMPONENTS = ROOT . '/components'; +const DOCS = ROOT . '/docs/reference'; + +/** Slug → directory map (also defines docs-site ordering). */ +const COMPONENT_ORDER = array( + array( 'html', 'HTML' ), + array( 'zip', 'Zip' ), + array( 'bytestream', 'ByteStream' ), + array( 'filesystem', 'Filesystem' ), + array( 'blockparser', 'BlockParser' ), + array( 'markdown', 'Markdown' ), + array( 'xml', 'XML' ), + array( 'encoding', 'Encoding' ), + array( 'dataliberation', 'DataLiberation' ), + array( 'git', 'Git' ), + array( 'merge', 'Merge' ), + array( 'httpclient', 'HttpClient' ), + array( 'httpserver', 'HttpServer' ), + array( 'corsproxy', 'CORSProxy' ), + array( 'cli', 'CLI' ), + array( 'polyfill', 'Polyfill' ), + array( 'blueprints', 'Blueprints' ), + array( 'coding-standards', 'ToolkitCodingStandards' ), +); + +const PAGE_HEAD = '<!doctype html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width, initial-scale=1"> +<title>{title} — PHP Toolkit reference + + + + + + + +
+ PHP Toolkit + +
+ +
+'; + +const PAGE_FOOT = ' +
+ + + + +'; + +/** @var Environment|null Lazy-initialized. */ +$GLOBALS['__cm_env'] = null; + +function commonmark_env(): Environment { + if ( null === $GLOBALS['__cm_env'] ) { + $env = new Environment( array() ); + $env->addExtension( new CommonMarkCoreExtension() ); + $GLOBALS['__cm_env'] = $env; + } + return $GLOBALS['__cm_env']; +} + +function render_nodes( array $nodes ): string { + $renderer = new HtmlRenderer( commonmark_env() ); + return rtrim( (string) $renderer->renderNodes( $nodes ) ); +} + +/** + * Render the lede as inline HTML, without an outer `

`. If the lede + * is a single Paragraph (the catalog convention), render its inline + * children directly. Otherwise fall back to rendering all nodes. + */ +function render_lede( array $nodes ): string { + $renderer = new HtmlRenderer( commonmark_env() ); + if ( 1 === count( $nodes ) && $nodes[0] instanceof Paragraph ) { + $inline = iterator_to_array( $nodes[0]->children() ); + return rtrim( (string) $renderer->renderNodes( $inline ) ); + } + return rtrim( (string) $renderer->renderNodes( $nodes ) ); +} + +/** + * Parse the markdown body into the catalog's structured shape: + * [ lede_html, [ ['heading'=>..., 'body'=>html, 'snippet'=>{...}|null, + * 'pitfalls'=>[html, ...]], ... ], document_pitfalls ] + * + * Snippet detection: an HtmlBlock whose literal begins with + * `` HtmlBlock plus a + * plain `FencedCode` after that captures the expected stdout. + * + * Pitfall detection: an HtmlBlock whose literal begins with + * `

Footgun:` or `

Gotcha:` (case-insensitive) is lifted out of + * its section into a separate pitfalls list. + */ +function parse_body( string $md ): array { + $parser = new MarkdownParser( commonmark_env() ); + /** @var Document $doc */ + $doc = $parser->parse( $md ); + + $children = iterator_to_array( $doc->children() ); + + // Find section boundaries (top-level H2 headings). + $boundaries = array(); + foreach ( $children as $idx => $node ) { + if ( $node instanceof Heading && 2 === $node->getLevel() ) { + $boundaries[] = $idx; + } + } + + // The lede is everything before the first H2. The catalog convention + // is a single Paragraph; render its inline children directly so the + // page template's

wrapper is the only paragraph. + $lede_nodes = array_slice( $children, 0, $boundaries ? $boundaries[0] : count( $children ) ); + $lede_html = render_lede( $lede_nodes ); + + $sections = array(); + foreach ( $boundaries as $i => $start ) { + $end = $boundaries[ $i + 1 ] ?? count( $children ); + /** @var Heading $heading_node */ + $heading_node = $children[ $start ]; + $heading_text = inline_text( $heading_node ); + $content_nodes = array_slice( $children, $start + 1, $end - $start - 1 ); + list( $body_nodes, $snippet, $pitfalls ) = classify_section_children( $content_nodes ); + $sections[] = array( + 'heading' => $heading_text, + 'body' => render_nodes( $body_nodes ), + 'snippet' => $snippet, + 'pitfalls' => $pitfalls, + ); + } + + return array( $lede_html, $sections ); +} + +/** + * Classify the children of a section into [body_nodes, snippet, pitfalls]. + * Each input is one of: snippet block, expected-output block, pitfall + * paragraph, plain content. + */ +function classify_section_children( array $nodes ): array { + $body = array(); + $pitfalls = array(); + $snippet = null; + $i = 0; + $n = count( $nodes ); + while ( $i < $n ) { + $node = $nodes[ $i ]; + $pitfall_inner = classify_pitfall_block( $node ); + if ( null !== $pitfall_inner ) { + $pitfalls[] = $pitfall_inner; + $i++; + continue; + } + if ( is_snippet_marker( $node ) && $i + 1 < $n && is_php_fence( $nodes[ $i + 1 ] ) ) { + $meta = parse_snippet_meta( $node->getLiteral() ); + $code = rtrim( $nodes[ $i + 1 ]->getLiteral(), "\n" ); + $expected = null; + $consumed = 2; + if ( $i + 3 < $n && is_expected_output_marker( $nodes[ $i + 2 ] ) && $nodes[ $i + 3 ] instanceof FencedCode ) { + $expected = rtrim( $nodes[ $i + 3 ]->getLiteral(), "\n" ); + $consumed = 4; + } + $snippet = array( + 'filename' => $meta['filename'] ?? '', + 'code' => $code, + 'runnable' => ! in_array( strtolower( $meta['runnable'] ?? 'true' ), array( 'false', 'no', '0' ), true ), + 'expected_output' => $expected, + ); + if ( '' === $snippet['filename'] ) { + throw new \RuntimeException( 'snippet missing filename in metadata' ); + } + $i += $consumed; + continue; + } + $body[] = $node; + $i++; + } + return array( $body, $snippet, $pitfalls ); +} + +function inline_text( Node $node ): string { + $parts = array(); + foreach ( $node->iterator() as $sub ) { + if ( $sub instanceof Text ) { + $parts[] = $sub->getLiteral(); + } + } + return trim( implode( '', $parts ) ); +} + +function is_snippet_marker( Node $node ): bool { + return $node instanceof HtmlBlock + && 0 === stripos( ltrim( $node->getLiteral() ), '` HTML comment + * into a key/value array. The comment delimiters are literal strings + * (CommonMark already gave us the whole HtmlBlock); strip them by + * length, then strip the leading `snippet:` label, then split each + * remaining line on its first `:`. + */ +function parse_snippet_meta( string $html_comment ): array { + $body = trim( $html_comment ); + if ( str_starts_with( $body, '' ) ) { + $body = trim( substr( $body, 4, -3 ) ); + } + if ( str_starts_with( strtolower( $body ), 'snippet:' ) ) { + $body = ltrim( substr( $body, strlen( 'snippet:' ) ) ); + } + $meta = array(); + foreach ( explode( "\n", $body ) as $line ) { + $line = trim( $line ); + if ( '' === $line || false === strpos( $line, ':' ) ) { + continue; + } + list( $k, $v ) = array_map( 'trim', explode( ':', $line, 2 ) ); + $meta[ $k ] = $v; + } + return $meta; +} + +function load_components(): array { + $front_matter = new \Webuni\FrontMatter\FrontMatter(); + $components = array(); + foreach ( COMPONENT_ORDER as $row ) { + list( $slug, $dir ) = $row; + $path = COMPONENTS . "/$dir/README.md"; + if ( ! is_file( $path ) ) { + throw new \RuntimeException( "missing README: $path" ); + } + $doc = $front_matter->parse( file_get_contents( $path ) ); + $fields = $doc->getData(); + + if ( ( $fields['slug'] ?? null ) !== $slug ) { + throw new \RuntimeException( "$path: frontmatter slug !== '$slug'" ); + } + $title = $fields['title'] ?? ''; + if ( '' === $title ) { + throw new \RuntimeException( "$path: missing title" ); + } + + $credit = null; + if ( ! empty( $fields['credit_title'] ) || ! empty( $fields['credit_body'] ) ) { + $credit = array( + $fields['credit_title'] ?? '', + $fields['credit_body'] ?? '', + ); + } + $see_also = parse_see_also( $fields['see_also'] ?? null ); + + list( $lede, $sections ) = parse_body( $doc->getContent() ); + + $components[] = array( + 'slug' => $slug, + 'title' => $title, + 'install' => $fields['install'] ?? null, + 'lede' => $lede, + 'credit' => $credit, + 'see_also' => $see_also, + 'sections' => $sections, + ); + } + return $components; +} + + +/** + * Convert see-also entries (each " | Title | reason") into + * (href, title, reason) triples. A target containing `/` or `.` is a + * verbatim URL; otherwise it's a slug rendered as `.html`. + */ +function parse_see_also( $value ): array { + if ( null === $value || '' === $value || array() === $value ) { + return array(); + } + $items = is_array( $value ) ? $value : array( $value ); + $out = array(); + foreach ( $items as $item ) { + $item = trim( $item ); + if ( '' === $item ) { + continue; + } + // Limit to 3 splits so a `|` inside the reason is preserved + // verbatim instead of breaking the parse. + $parts = array_map( 'trim', explode( '|', $item, 3 ) ); + if ( count( $parts ) !== 3 ) { + throw new \RuntimeException( "see_also must have three pipe-separated fields, got: $item" ); + } + list( $target, $title, $reason ) = $parts; + $href = ( false !== strpos( $target, '/' ) || false !== strpos( $target, '.' ) ) + ? $target + : "{$target}.html"; + $out[] = array( $href, $title, $reason ); + } + return $out; +} + +// --------------------------------------------------------------------- +// Renderer. +// +// Every HTML fragment we emit goes through WP_HTML_Processor (or the +// underlying WP_HTML_Tag_Processor): +// +// * Authored fragments — sidebar, headings, install line, see-also, +// snippet block, credit callout — are built by parsing a skeleton +// with WP_HTML_Tag_Processor and patching attributes / text-node +// content via set_attribute() / set_modifiable_text(). The +// processor handles attribute escaping and text-node entity +// encoding, so no concatenation of escaped values is needed. +// +// * Inputs we receive as HTML (lede, section bodies, credit body, +// pitfall inner) get parsed by WP_HTML_Processor::create_fragment() +// and re-emitted via ::serialize(). That validates and normalizes +// them through the tokenizer rather than embedding them +// verbatim — unclosed tags get closed, attributes get the +// parser's quoting, etc. Nothing is "trusted" as a raw byte +// stream into the output. +// --------------------------------------------------------------------- + +/** + * Run an HTML fragment through WP_HTML_Processor — parse it as a + * fragment, then serialize it back. The parse+serialize cycle + * normalizes whatever the caller hands us through the tokenizer + * (closing unclosed tags, normalizing attribute quoting, etc.). + */ +function normalize_fragment( string $html ): string { + $p = \WP_HTML_Processor::create_fragment( $html ); + return null !== $p ? $p->serialize() : ''; +} + +/** + * Slugify a heading into an anchor id. ASCII case-fold, then walk + * characters: alphanumeric / `_` / `-` survive verbatim, runs of + * whitespace collapse to a single dash, everything else is dropped. + * No regex. + */ +function slugify( string $text ): string { + $text = mb_strtolower( $text ); + $out = ''; + $space_pending = false; + foreach ( mb_str_split( $text ) as $ch ) { + if ( ctype_alnum( $ch ) || '_' === $ch || '-' === $ch ) { + if ( $space_pending && '' !== $out ) { + $out .= '-'; + } + $out .= $ch; + $space_pending = false; + } elseif ( ctype_space( $ch ) ) { + $space_pending = true; + } + // Anything else (punctuation, symbols) is dropped, matching the + // older `[^\w\s-]` strip. + } + return $out; +} + +/** + * Render one snippet's block by patching attributes and + * text nodes on a skeleton via WP_HTML_Tag_Processor — no string + * concatenation of HTML, no manual entity escaping. set_attribute() + * handles attribute escaping; set_modifiable_text() entity-encodes + * text-node content (e.g. inside ) and writes script raw text + * verbatim (after the one bypass for embedded `next_tag() ) { + switch ( $p->get_tag() ) { + case 'PHP-SNIPPET': + $p->set_attribute( 'name', $snippet['filename'] ); + if ( ! $snippet['runnable'] ) { + $p->set_attribute( 'runnable', 'false' ); + } + break; + case 'CODE': + $p->set_modifiable_text( rtrim( $snippet['code'] ) ); + break; + case 'SCRIPT': + $type = $p->get_attribute( 'type' ); + if ( 'application/x-php' === $type ) { + $payload = rtrim( $snippet['code'] ); + } elseif ( 'text/expected-output' === $type && $has_output ) { + $payload = rtrim( $expected ); + } else { + break; + } + // WP_HTML_Tag_Processor::set_modifiable_text() refuses any + // content that contains `set_modifiable_text( $payload ); + break; + } + } + return $p->get_updated_html() . "\n"; +} + +/** + * Build the sidebar by parsing a skeleton with one `

  • x
  • ` + * per component, then walking and patching: set href on each ``, + * write the component title into the ``'s text node, mark the + * current component's `
  • ` with `class="current"`. Tag_Processor + * handles attribute and text escaping. No concat with escaped values. + */ +function sidebar( array $components, string $current_slug ): string { + $skeleton = ''; + + $p = new \WP_HTML_Tag_Processor( $skeleton ); + $idx = 0; + $awaiting_link_text = false; + while ( $p->next_token() ) { + $type = $p->get_token_type(); + if ( '#tag' === $type && ! $p->is_tag_closer() ) { + $tag = $p->get_tag(); + if ( 'LI' === $tag ) { + if ( $components[ $idx ]['slug'] === $current_slug ) { + $p->add_class( 'current' ); + } + } elseif ( 'A' === $tag ) { + $p->set_attribute( 'href', $components[ $idx ]['slug'] . '.html' ); + $awaiting_link_text = true; + } + } elseif ( '#text' === $type && $awaiting_link_text ) { + $p->set_modifiable_text( $components[ $idx ]['title'] ); + $awaiting_link_text = false; + ++$idx; + } + } + return $p->get_updated_html(); +} + +/** Build the page + opening header by patching PAGE_HEAD's + * `` text and `<meta name="description">` content via + * Tag_Processor. The asset-version placeholder is a plain URL slot + * (no HTML escaping), so a simple str_replace is fine for it. + */ +function build_page_head( string $title_text, string $description ): string { + $head = str_replace( '{asset_version}', ASSET_VERSION, PAGE_HEAD ); + $p = new \WP_HTML_Tag_Processor( $head ); + while ( $p->next_token() ) { + if ( '#tag' !== $p->get_token_type() || $p->is_tag_closer() ) { + continue; + } + $tag = $p->get_tag(); + if ( 'TITLE' === $tag ) { + $p->set_modifiable_text( $title_text . ' — PHP Toolkit reference' ); + } elseif ( 'META' === $tag && 'description' === $p->get_attribute( 'name' ) ) { + $p->set_attribute( 'content', $description ); + } + } + return $p->get_updated_html(); +} + +/** Build a heading via Tag_Processor: skeleton + navigate to the tag, + * set its id, then walk to the inner text node and replace it. */ +function build_heading( string $tag, string $text, ?string $id = null ): string { + $lc = strtolower( $tag ); + $uc = strtoupper( $tag ); + $skeleton = "<{$lc} id=\"\">x</{$lc}>"; + $p = new \WP_HTML_Tag_Processor( $skeleton ); + while ( $p->next_token() ) { + $type = $p->get_token_type(); + if ( '#tag' === $type && ! $p->is_tag_closer() && $uc === $p->get_tag() ) { + $p->set_attribute( 'id', $id ?? slugify( $text ) ); + } elseif ( '#text' === $type ) { + $p->set_modifiable_text( $text ); + } + } + return $p->get_updated_html(); +} + +/** Build `<pre><code class="install">composer require <pkg></code></pre>` + * by patching the inner <code>'s text node. */ +function build_install_block( string $package ): string { + $p = new \WP_HTML_Tag_Processor( '<pre><code class="install">x</code></pre>' ); + while ( $p->next_token() ) { + if ( '#text' === $p->get_token_type() ) { + $p->set_modifiable_text( "composer require {$package}" ); + break; + } + } + return $p->get_updated_html(); +} + +/** + * Build the credit callout. The lead title becomes a bold sentence; + * the body is HTML received from the README's frontmatter. The full + * fragment (lead + body) is parsed by WP_HTML_Processor and walked: + * we navigate to the <strong>'s inner text node and write the title + * via set_modifiable_text, then ::serialize() emits the result. The + * body HTML is normalized by the parser; nothing is concat-as-bytes + * into the output. + */ +function build_credit_block( string $title_text, string $body_html ): string { + // Tag the lead <strong> with a sentinel attribute so we know which + // one to patch even when the body HTML contains its own <strong> + // tags. The attribute gets removed before serialization. + $fragment = '<aside class="callout credit"><strong data-lead>x</strong> ' . $body_html . '</aside>'; + $p = \WP_HTML_Processor::create_fragment( $fragment ); + if ( null === $p ) { + return ''; + } + $awaiting_lead_text = false; + while ( $p->next_token() ) { + $type = $p->get_token_type(); + if ( '#tag' === $type && ! $p->is_tag_closer() && 'STRONG' === $p->get_tag() && null !== $p->get_attribute( 'data-lead' ) ) { + $p->remove_attribute( 'data-lead' ); + $awaiting_lead_text = true; + } elseif ( '#text' === $type && $awaiting_lead_text ) { + $p->set_modifiable_text( $title_text . '.' ); + break; + } + } + // WP_HTML_Processor::serialize() refuses once the cursor has + // advanced; get_updated_html() (inherited from Tag_Processor) + // returns the byte-edited source with our text-node patch applied. + return $p->get_updated_html(); +} + +/** + * Build the see-also list. Each entry becomes + * <li><a href="..."><strong>Title</strong></a> <span>Reason</span></li> + * Skeleton + navigate-and-set, no concat-with-escape. + */ +function build_see_also( array $see_also ): string { + $skeleton = '<ul class="related-components">'; + foreach ( $see_also as $unused ) { + $skeleton .= '<li><a href=""><strong>x</strong></a><span>x</span></li>'; + } + $skeleton .= '</ul>'; + + $p = new \WP_HTML_Tag_Processor( $skeleton ); + $idx = 0; + $slot = null; // 'STRONG' or 'SPAN' + while ( $p->next_token() ) { + $type = $p->get_token_type(); + if ( '#tag' === $type && ! $p->is_tag_closer() ) { + $tag = $p->get_tag(); + if ( 'A' === $tag ) { + $p->set_attribute( 'href', $see_also[ $idx ][0] ); + } elseif ( 'STRONG' === $tag || 'SPAN' === $tag ) { + $slot = $tag; + } + } elseif ( '#text' === $type && null !== $slot ) { + if ( 'STRONG' === $slot ) { + $p->set_modifiable_text( $see_also[ $idx ][1] ); + } else { // 'SPAN' + $p->set_modifiable_text( $see_also[ $idx ][2] ); + ++$idx; + } + $slot = null; + } + } + return $p->get_updated_html(); +} + +function render_component( array $components, array $c ): string { + $pitfalls = array(); + $sections = $c['sections']; + + $purpose_html = ''; + $usage = $sections; + if ( $sections && strtolower( $sections[0]['heading'] ) === 'why this exists' ) { + $purpose_html = $sections[0]['body']; + $pitfalls = array_merge( $pitfalls, $sections[0]['pitfalls'] ); + $usage = array_slice( $sections, 1 ); + } + + $lede_text = trim( strip_tags( $c['lede'] ) ); + + // Each builder returns a fully-formed HTML fragment built via + // Tag_Processor; the page is glued together from those fragments. + $pieces = array(); + $pieces[] = build_page_head( $c['title'], $lede_text ); + $pieces[] = sidebar( $components, $c['slug'] ); + $pieces[] = "\t<article class=\"content\">\n"; + + $pieces[] = build_heading( 'h1', $c['title'] ); + // The lede is HTML rendered by CommonMark from the README; wrap it + // inside <p class="lede"> and re-parse the whole fragment through + // WP_HTML_Processor, which closes any unclosed tags and normalizes + // the markup before it lands in the output. + $pieces[] = normalize_fragment( '<p class="lede">' . $c['lede'] . '</p>' ); + + if ( $c['install'] ) { + $pieces[] = build_install_block( (string) $c['install'] ); + } + if ( $c['credit'] ) { + list( $title_credit, $body_credit ) = $c['credit']; + $pieces[] = build_credit_block( $title_credit, $body_credit ); + } + + if ( $purpose_html ) { + $pieces[] = normalize_fragment( $purpose_html ); + } + + foreach ( $usage as $section ) { + $pitfalls = array_merge( $pitfalls, $section['pitfalls'] ); + $pieces[] = build_heading( 'h2', $section['heading'] ); + if ( $section['body'] ) { + $pieces[] = normalize_fragment( $section['body'] ); + } + if ( $section['snippet'] ) { + $pieces[] = snippet_block( $section['snippet'] ); + } + } + + if ( $pitfalls ) { + $pieces[] = build_heading( 'h2', 'Pitfalls', 'pitfalls' ); + foreach ( $pitfalls as $p ) { + $pieces[] = normalize_fragment( '<aside class="callout pitfall">' . $p . '</aside>' ); + } + } + + if ( $c['see_also'] ) { + $pieces[] = build_heading( 'h2', 'See also', 'see-also' ); + $pieces[] = build_see_also( $c['see_also'] ); + } + + $pieces[] = PAGE_FOOT; + return implode( "\n\n", $pieces ); +} + +function build_reference_main(): void { + if ( ! is_dir( DOCS ) ) { + mkdir( DOCS, 0755, true ); + } + $components = load_components(); + foreach ( $components as $c ) { + $path = DOCS . '/' . $c['slug'] . '.html'; + file_put_contents( $path, render_component( $components, $c ) ); + echo 'wrote reference/' . $c['slug'] . ".html\n"; + } +} + +// Only run as a script when invoked directly. run-snippets.php requires +// this file purely to reuse load_components() and the constants. +if ( isset( $_SERVER['SCRIPT_FILENAME'] ) && realpath( $_SERVER['SCRIPT_FILENAME'] ) === __FILE__ ) { + build_reference_main(); +} diff --git a/bin/build-reference.py b/bin/build-reference.py deleted file mode 100644 index b79c59d54..000000000 --- a/bin/build-reference.py +++ /dev/null @@ -1,235 +0,0 @@ -#!/usr/bin/env python3 -"""Generates docs/reference/<slug>.html for every component. - -The catalog comes from components/<Name>/README.md (loaded via -bin/_load_catalog.py). Each README *is* the catalog source — frontmatter -+ lede + sections + snippets + expected-output fences. Every page uses -the same concept-guide shape: lede + install + context paragraphs + -minimal example + refinements + pitfalls + see also. There are no -hand-authored exceptions. -""" - -import os -import re -import sys -from html import escape as h - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -from _load_catalog import load_components_rich - -DOCS = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'docs', 'reference') -ASSET_VERSION = '20260503-fallback-explicit-hide' - - -PAGE_HEAD = '''<!doctype html> -<html lang="en"> -<head> -<meta charset="utf-8"> -<meta name="viewport" content="width=device-width, initial-scale=1"> -<title>{title} — PHP Toolkit reference - - - - - - - -
    -\tPHP Toolkit -\t -
    - -
    -''' - -PAGE_FOOT = '''\t -
    - - - - -''' - - -def slugify(text): - return re.sub(r'[^\w\s-]', '', text.lower()).strip().replace(' ', '-') - - -def split_pitfalls(body_html): - """Pull paragraphs that begin with 'Footgun:' or 'Gotcha:' out of a body - and return them as separate pitfall callouts. - - Returns ``(rest_html, [pitfall_html, ...])`` where ``rest_html`` is the - original body with only the matching ``

    ...

    `` chunks removed — - tables, lists, ``
    `` blocks, and any other markup are preserved
    -    verbatim. Earlier versions accidentally dropped non-``

    `` content - because they walked the body via a ``

    `` regex and re-emitted only - the matched chunks. - """ - pitfalls = [] - def replace(match): - chunk = match.group(0) - plain = re.sub(r'<[^>]+>', '', chunk).strip() - if plain.lower().startswith(('footgun', 'gotcha')): - inner = chunk[3:-4] # strip

    ...

    - inner = re.sub(r'^(Footgun|Gotcha)[^<]*\s*[—:.\s]*', '', inner) - inner = re.sub(r'^(Footgun|Gotcha)[^a-z<]*', '', inner) - pitfalls.append(inner.strip()) - return '' - return chunk - rest = re.sub(r'

    .*?

    ', replace, body_html, flags=re.DOTALL) - return rest, pitfalls - - -def snippet_block(snippet): - """Render a snippet dict as a custom-element block. - - Includes the captured expected-output (when present) so the docs page - paints the result before WordPress Playground finishes booting, and - emits a static
     fallback inside the same element so readers
    -    see the snippet code even if Playground's JS module fails to load
    -    (cross-origin block, slow network, adblocker, no-JS clients).
    -
    -    CSS hides the fallback when the custom element is :defined, so the
    -    interactive widget owns the screen as soon as it registers.
    -    """
    -    name = snippet['filename']
    -    code = snippet['code']
    -    runnable = snippet['runnable']
    -    expected = snippet['expected_output'] if runnable else None
    -
    -    safe = code.rstrip().replace(''
    -        f'{h(code.rstrip())}'
    -        '
    \n' - ) - expected_block = '' - if expected is not None: - expected_safe = expected.rstrip().replace('\n{expected_safe}\n\n' - ) - runnable_attr = '' if runnable else ' runnable="false"' - return ( - f'\n' - f'{fallback}' - f'\n' - f'{expected_block}' - f'\n' - ) - - -def sidebar(components, current_slug): - items = [] - for c in components: - href = f'{c["slug"]}.html' - cls = ' class="current"' if c['slug'] == current_slug else '' - items.append(f'\t\t\t{h(c["title"])}
  • ') - return ( - '\t\n' - ) - - -def render_component(components, c): - # Separate the "Why this exists" intro from the worked sections. - purpose_html = '' - pitfalls_from_purpose = [] - sections = c['sections'] - usage = sections - if sections and sections[0]['heading'].lower() == 'why this exists': - body = sections[0]['body'] or '' - purpose_html, pitfalls_from_purpose = split_pitfalls(body) - usage = sections[1:] - - out = [PAGE_HEAD.format( - title=h(c['title']), - description=h(re.sub(r'<[^>]+>', '', c['lede'])), - asset_version=ASSET_VERSION, - )] - out.append(sidebar(components, c['slug'])) - out.append('\t
    \n\n') - out.append(f'

    {h(c["title"])}

    \n\n') - out.append(f'

    {c["lede"]}

    \n\n') - if c['install']: - out.append(f'
    composer require {h(c["install"])}
    \n\n') - if c['credit']: - title_credit, body_credit = c['credit'] - out.append( - '\n\n' - ) - if purpose_html: - out.append(purpose_html + '\n\n') - - # Worked examples + accumulated pitfalls. - pitfalls = list(pitfalls_from_purpose) - minimal_emitted = False - for section in usage: - heading = section['heading'] - body_html = section['body'] or '' - snippet = section['snippet'] - rest, found = split_pitfalls(body_html) - pitfalls.extend(found) - # Use the section's own heading verbatim — no synthetic prefixes. - # The first section in a component that ships a snippet doubles as - # the "minimal example" by convention; readers can tell it from - # later sections by its position, not by an editorial label. - h2 = heading - if not minimal_emitted and snippet: - minimal_emitted = True - out.append(f'

    {h(h2)}

    \n\n') - if rest: - out.append(rest + '\n\n') - if snippet: - out.append(snippet_block(snippet) + '\n') - - if pitfalls: - out.append('

    Pitfalls

    \n\n') - for p in pitfalls: - out.append(f'\n\n') - - if c['see_also']: - out.append('

    See also

    \n\n') - out.append('\n\n') - - out.append(PAGE_FOOT) - return ''.join(out) - - -def main(): - os.makedirs(DOCS, exist_ok=True) - components = load_components_rich() - for c in components: - out = render_component(components, c) - path = os.path.join(DOCS, f'{c["slug"]}.html') - with open(path, 'w') as f: - f.write(out) - print(f'wrote reference/{c["slug"]}.html') - - -if __name__ == '__main__': - main() diff --git a/bin/run-snippets.php b/bin/run-snippets.php new file mode 100755 index 000000000..58df2d6cb --- /dev/null +++ b/bin/run-snippets.php @@ -0,0 +1,366 @@ +/README.md against + * the local toolkit and compares stdout to the captured expected-output + * block stored next to the snippet in markdown. + * + * php bin/run-snippets.php --check Verify expected outputs + * (default; used by CI). + * php bin/run-snippets.php --update Re-run runnable snippets + * and write captured stdout + * back into each README. + * php bin/run-snippets.php --filter foo Limit to snippets whose + * slug or filename contains + * `foo`. + * + * Snippets in NO_EXPECTED are runnable but their stdout is unstable + * (real network traffic, timestamps); they are required to exit 0 + * but their output is not pinned. + */ + +declare(strict_types=1); + +namespace WordPress\Toolkit\DocsBuild; + +if ( ! is_file( __DIR__ . '/../vendor/autoload.php' ) ) { + fwrite( STDERR, "Run `composer install` first.\n" ); + exit( 2 ); +} +require __DIR__ . '/../vendor/autoload.php'; +require __DIR__ . '/build-reference.php'; + +const VENDOR_AUTOLOAD = ROOT . '/vendor/autoload.php'; +const PLAYGROUND_AUTOLOAD = '/wordpress/wp-content/php-toolkit/vendor/autoload.php'; + +const NO_EXPECTED = array( + 'httpclient::get.php', + 'httpclient::post.php', + 'httpclient::progress.php', + 'httpclient::sliding-window.php', + 'httpclient::resume-download.php', + 'httpclient::stream-unzip.php', + 'httpclient::fan-out.php', + 'httpclient::stream-to-disk.php', +); + +const LOCAL_PRELUDE = " +if ( ! function_exists( 'parse_blocks' ) ) { +\tfunction parse_blocks( \$content ) { +\t\treturn ( new \\WP_Block_Parser() )->parse( \$content ); +\t} +} +"; + +function rewrite_for_local( string $code ): string { + $code = str_replace( PLAYGROUND_AUTOLOAD, VENDOR_AUTOLOAD, $code ); + if ( preg_match( "/require\s+'[^']*vendor\/autoload\.php';/", $code, $m, PREG_OFFSET_CAPTURE ) ) { + $insert_at = $m[0][1] + strlen( $m[0][0] ); + $code = substr( $code, 0, $insert_at ) . LOCAL_PRELUDE . substr( $code, $insert_at ); + } + return $code; +} + +/** + * Run a snippet under PHP and return [exit_code, stdout, stderr]. + */ +function run_php( string $code, int $timeout_seconds = 15 ): array { + $tmp = tempnam( sys_get_temp_dir(), 'snip' ) . '.php'; + file_put_contents( $tmp, rewrite_for_local( $code ) ); + $descriptors = array( + 0 => array( 'pipe', 'r' ), + 1 => array( 'pipe', 'w' ), + 2 => array( 'pipe', 'w' ), + ); + $proc = proc_open( + array( PHP_BINARY, '-d', 'display_errors=stderr', $tmp ), + $descriptors, + $pipes + ); + if ( ! is_resource( $proc ) ) { + @unlink( $tmp ); + return array( -1, '', 'failed to spawn php' ); + } + fclose( $pipes[0] ); + stream_set_blocking( $pipes[1], false ); + stream_set_blocking( $pipes[2], false ); + + $stdout = ''; + $stderr = ''; + $deadline = microtime( true ) + $timeout_seconds; + $timed_out = false; + $rc = -1; + while ( true ) { + $status = proc_get_status( $proc ); + $stdout .= stream_get_contents( $pipes[1] ); + $stderr .= stream_get_contents( $pipes[2] ); + if ( ! $status['running'] ) { + // proc_get_status reports the real exit code only on the first + // call where running flips to false; subsequent calls (and + // proc_close itself) return -1. Capture it here. + $rc = $status['exitcode']; + break; + } + if ( microtime( true ) > $deadline ) { + proc_terminate( $proc, 9 ); + $timed_out = true; + break; + } + usleep( 5000 ); + } + $stdout .= stream_get_contents( $pipes[1] ); + $stderr .= stream_get_contents( $pipes[2] ); + fclose( $pipes[1] ); + fclose( $pipes[2] ); + proc_close( $proc ); + @unlink( $tmp ); + if ( $timed_out ) { + return array( -1, $stdout, "TIMEOUT after {$timeout_seconds}s\n$stderr" ); + } + return array( $rc, $stdout, $stderr ); +} + +/** + * Strip noise that varies between runs (tempfile names, hashes, etc.). + * Patterns mirror the Python implementation 1:1 so existing + * expected-output captures continue to match. + */ +function normalize( string $text ): string { + $text = preg_replace( '#/tmp/\w+\.zip#', '/tmp/.zip', $text ) ?? $text; + $text = preg_replace( '#(/tmp/\w+)(\.epub|\.tmp\.[a-f0-9]+)?#', '/tmp/$2', $text ) ?? $text; + $text = preg_replace( '#sys_get_temp_dir\(\) \. \'/[^\']+#', "sys_get_temp_dir() . '/", $text ) ?? $text; + $text = preg_replace( '#/(toolkit|atomic|copytree|big|orig|repacked|app|book|demo|sample|hash|gz|dl)-[a-f0-9]+#', '/$1-XXXXXX', $text ) ?? $text; + $text = preg_replace_callback( + '/\bnonce(?:: |=")([0-9a-f]{16})"?/', + fn( array $m ) => str_replace( $m[1], '', $m[0] ), + $text + ) ?? $text; + $text = preg_replace( '/\bcommit: [0-9a-f]{40}\b/', 'commit: ', $text ) ?? $text; + $text = preg_replace( '/\bHEAD:\s+[0-9a-f]{40}/', 'HEAD: ', $text ) ?? $text; + $text = preg_replace( '/\boid: [0-9a-f]{40}\b/', 'oid: ', $text ) ?? $text; + $text = preg_replace( '/merge head: [0-9a-f]{40}/', 'merge head: ', $text ) ?? $text; + $text = preg_replace( '/\b[a-f0-9]{7} /', ' ', $text ) ?? $text; + $text = preg_replace( '/Peak memory: [\d.]+ MB/', 'Peak memory: MB', $text ) ?? $text; + return $text; +} + +function trim_trailing_newlines( string $s ): string { + return rtrim( $s, "\n" ); +} + +/** + * Replace (or insert) the captured-output fence for one snippet inside + * its component's README.md. + * + * Uses CommonMark to find the snippet's exact line range — no regex + * over the README. The README's body is parsed; we walk top-level + * children looking for the HtmlBlock whose snippet metadata names the + * given filename, take the next FencedCode (info=`php`) as the snippet + * code's last line, then look at the next two children for an existing + * expected-output marker + fence. We splice line-by-line. + */ +function write_expected_output( string $slug, string $filename, string $new_output ): void { + $dir_name = null; + foreach ( COMPONENT_ORDER as $row ) { + if ( $row[0] === $slug ) { + $dir_name = $row[1]; + break; + } + } + if ( ! $dir_name ) { + throw new \RuntimeException( "unknown slug: $slug" ); + } + $path = COMPONENTS . "/$dir_name/README.md"; + $text = file_get_contents( $path ); + + $front_matter = new \Webuni\FrontMatter\FrontMatter(); + $doc = $front_matter->parse( $text ); + // Lines in the body are line-1-indexed by CommonMark, but the body + // itself starts after the frontmatter in the file. Compute the + // offset so node line numbers map back to the original file. + $body = $doc->getContent(); + $body_offset = strpos( $text, $body ); + if ( false === $body_offset ) { + throw new \RuntimeException( "could not locate body in $path" ); + } + $prefix_lines = substr_count( substr( $text, 0, $body_offset ), "\n" ); + + $parser = new \League\CommonMark\Parser\MarkdownParser( commonmark_env() ); + $tree = $parser->parse( $body ); + $kids = iterator_to_array( $tree->children() ); + + // Find the snippet metadata HtmlBlock whose meta names this filename. + $snippet_idx = null; + foreach ( $kids as $idx => $node ) { + if ( ! $node instanceof \League\CommonMark\Extension\CommonMark\Node\Block\HtmlBlock ) { + continue; + } + if ( 0 !== stripos( ltrim( $node->getLiteral() ), '\n{$fence}\n" . rtrim( $new_output, "\n" ) . "\n{$fence}"; + + $lines = explode( "\n", $text ); + if ( $has_existing ) { + // Replace the existing marker + fence span with the new block. + $start_line = $exp_marker->getStartLine() - 1 + $prefix_lines; // 0-indexed + $end_line = $exp_fence->getEndLine() - 1 + $prefix_lines; + array_splice( + $lines, + $start_line, + $end_line - $start_line + 1, + explode( "\n", $expected_block ) + ); + } else { + // Insert after the php fence's last line, with a blank separator. + $insert_at = $php_fence->getEndLine() - 1 + $prefix_lines; + array_splice( + $lines, + $insert_at + 1, + 0, + array_merge( array( '' ), explode( "\n", $expected_block ) ) + ); + } + file_put_contents( $path, implode( "\n", $lines ) ); +} + + +function run_snippets_main( array $argv ): int { + $update = false; + $check = false; + $filter = null; + for ( $i = 1; $i < count( $argv ); $i++ ) { + switch ( $argv[ $i ] ) { + case '--update': + $update = true; + break; + case '--check': + $check = true; + break; + case '--filter': + $filter = $argv[ ++$i ] ?? null; + break; + default: + fwrite( STDERR, "unknown arg: {$argv[$i]}\n" ); + return 2; + } + } + if ( ! $update && ! $check ) { + $check = true; + } + if ( ! is_file( VENDOR_AUTOLOAD ) ) { + fwrite( STDERR, 'ERROR: ' . VENDOR_AUTOLOAD . " not found. Run composer install first.\n" ); + return 2; + } + + $components = load_components(); + $matched = 0; + $skipped = 0; + $drift = array(); + $failures = array(); + $pending_writes = array(); + + foreach ( $components as $c ) { + $slug = $c['slug']; + foreach ( $c['sections'] as $section ) { + $snippet = $section['snippet']; + if ( ! $snippet || ! $snippet['runnable'] ) { + continue; + } + $filename = $snippet['filename']; + if ( $filter !== null && false === strpos( $slug, $filter ) && false === strpos( $filename, $filter ) ) { + continue; + } + list( $rc, $stdout, $stderr ) = run_php( $snippet['code'] ); + if ( 0 !== $rc ) { + $lines = explode( "\n", trim( $stderr ) ); + $failures[] = array( $slug, $filename, array_slice( $lines, 0, 2 ) ); + $skipped++; + continue; + } + $key = "$slug::$filename"; + if ( in_array( $key, NO_EXPECTED, true ) ) { + $matched++; + continue; + } + + $normalized = normalize( $stdout ); + $expected = $snippet['expected_output']; + + if ( null === $expected ) { + $drift[] = array( $slug, $filename, 'NEW (run --update to capture)' ); + if ( $update ) { + $pending_writes[] = array( $slug, $filename, $normalized ); + } + continue; + } + $expected_norm = trim_trailing_newlines( normalize( $expected ) ); + $got_norm = trim_trailing_newlines( $normalized ); + if ( $expected_norm !== $got_norm ) { + $drift[] = array( $slug, $filename, 'OUTPUT CHANGED' ); + if ( $update ) { + $pending_writes[] = array( $slug, $filename, $normalized ); + } + continue; + } + $matched++; + } + } + + $total = $matched + count( $drift ); + echo "\nRan $total snippets; $skipped couldn't run locally.\n"; + foreach ( $failures as $f ) { + list( $slug, $filename, $why ) = $f; + $why_text = $why ? implode( ' ', $why ) : '(no stderr)'; + printf( " skip %-38s %s\n", "$slug/$filename", substr( $why_text, 0, 80 ) ); + } + if ( $check ) { + foreach ( $drift as $d ) { + list( $slug, $filename, $kind ) = $d; + printf( " DRIFT %-38s %s\n", "$slug/$filename", $kind ); + } + } + + if ( $update ) { + foreach ( $pending_writes as $w ) { + list( $slug, $filename, $new_output ) = $w; + write_expected_output( $slug, $filename, $new_output ); + echo " wrote $slug/$filename\n"; + } + printf( "\nUpdated %d expected-output blocks in markdown.\n", count( $pending_writes ) ); + return 0; + } + + if ( $drift ) { + printf( "\n%d snippet(s) drifted. Run `php bin/run-snippets.php --update` to refresh.\n", count( $drift ) ); + return 1; + } + echo "\nAll snippets match expected outputs.\n"; + return 0; +} + +exit( run_snippets_main( $argv ) ); diff --git a/bin/run-snippets.py b/bin/run-snippets.py deleted file mode 100755 index 0ff6af0f0..000000000 --- a/bin/run-snippets.py +++ /dev/null @@ -1,229 +0,0 @@ -#!/usr/bin/env python3 -"""Runs every PHP snippet declared in components//README.md against -the local toolkit and compares stdout to the captured expected-output that -lives next to the snippet in markdown. Used in two ways: - - bin/run-snippets.py --update Re-run runnable snippets and write the - new stdout back into each markdown - file's expected-output fence. - bin/run-snippets.py --check Run every snippet, compare against the - committed expected output. Exit nonzero - on drift. Used by snippet-tests.yml. - -Snippets reference '/wordpress/wp-content/php-toolkit/vendor/autoload.php' — -the path that exists inside Playground. The runner rewrites that to the -repo's local vendor/autoload.php before executing. - -Snippets marked non-runnable in the catalog are skipped. Snippets in -NO_EXPECTED are runnable but their stdout is environment-dependent (real -network traffic, timestamps); they're verified to exit 0 and have no -captured expected output. -""" - -import argparse -import os -import re -import subprocess -import sys -import tempfile - -THIS = os.path.dirname(os.path.abspath(__file__)) -ROOT = os.path.dirname(THIS) -sys.path.insert(0, THIS) -from _load_catalog import load_components_rich # noqa: E402 - -VENDOR_AUTOLOAD = os.path.join(ROOT, 'vendor', 'autoload.php') -COMPONENTS_ROOT = os.path.join(ROOT, 'components') - -# Runnable snippets whose stdout is unstable. They exit 0 but their output -# is not pinned (real network traffic, timestamps, host-specific values). -NO_EXPECTED = { - ('httpclient', 'get.php'), - ('httpclient', 'post.php'), - ('httpclient', 'progress.php'), - ('httpclient', 'sliding-window.php'), - ('httpclient', 'resume-download.php'), - ('httpclient', 'stream-unzip.php'), - ('httpclient', 'fan-out.php'), - ('httpclient', 'stream-to-disk.php'), -} - -PLAYGROUND_AUTOLOAD = "/wordpress/wp-content/php-toolkit/vendor/autoload.php" - -# Tiny polyfill so WordPress-only globals don't break local runs. -LOCAL_PRELUDE = """ -if ( ! function_exists( 'parse_blocks' ) ) { -\tfunction parse_blocks( $content ) { -\t\treturn ( new WP_Block_Parser() )->parse( $content ); -\t} -} -""" - - -def rewrite(code): - code = code.replace(PLAYGROUND_AUTOLOAD, VENDOR_AUTOLOAD) - match = re.search(r"require\s+'[^']*vendor/autoload\.php';", code) - if match: - insert_at = match.end() - code = code[:insert_at] + LOCAL_PRELUDE + code[insert_at:] - return code - - -def run_one(code, timeout=15): - with tempfile.NamedTemporaryFile(suffix='.php', mode='w', delete=False) as f: - f.write(rewrite(code)) - path = f.name - try: - proc = subprocess.run( - ['php', '-d', 'display_errors=stderr', path], - capture_output=True, text=True, timeout=timeout, - ) - return proc.returncode, proc.stdout, proc.stderr - except subprocess.TimeoutExpired: - return -1, '', f'TIMEOUT after {timeout}s' - finally: - try: - os.unlink(path) - except OSError: - pass - - -def normalize(text): - """Strip noise that varies between runs (tempfile names, hashes, etc.).""" - text = re.sub(r'/tmp/\w+\.zip', '/tmp/.zip', text) - text = re.sub(r'(/tmp/\w+)(\.epub|\.tmp\.[a-f0-9]+)?', r'/tmp/\2', text) - text = re.sub(r'sys_get_temp_dir\(\) \. \'/[^\']+', "sys_get_temp_dir() . '/", text) - text = re.sub(r'/(toolkit|atomic|copytree|big|orig|repacked|app|book|demo|sample|hash|gz|dl)-[a-f0-9]+', r'/\1-XXXXXX', text) - text = re.sub(r'\bnonce(?:: |=")([0-9a-f]{16})"?', lambda m: m.group(0).replace(m.group(1), ''), text) - text = re.sub(r'\bcommit: [0-9a-f]{40}\b', 'commit: ', text) - text = re.sub(r'\bHEAD:\s+[0-9a-f]{40}', 'HEAD: ', text) - text = re.sub(r'\boid: [0-9a-f]{40}\b', 'oid: ', text) - text = re.sub(r'merge head: [0-9a-f]{40}', 'merge head: ', text) - text = re.sub(r'\b[a-f0-9]{7} ', ' ', text) - text = re.sub(r'Peak memory: [\d.]+ MB', 'Peak memory: MB', text) - return text - - -def write_expected_output(slug, filename, new_output): - """Write a new captured stdout into the component's README.md, creating - or updating the snippet's `` fence.""" - from _load_catalog import COMPONENT_ORDER - dir_name = dict(COMPONENT_ORDER).get(slug) - if not dir_name: - raise ValueError(f'Unknown component slug: {slug}') - path = os.path.join(COMPONENTS_ROOT, dir_name, 'README.md') - with open(path, encoding='utf-8') as f: - text = f.read() - - # Match the snippet block whose metadata holds `filename: `. The - # filename is unique per component, so a non-greedy search anchored on - # `filename: ` is sufficient. - snippet_pattern = re.compile( - r'(\s*\n(?P`{3,})\w*\n.*?\n(?P=exp_fence))?', - re.DOTALL, - ) - m = snippet_pattern.search(text) - if not m: - raise RuntimeError(f'Could not locate snippet {slug}::{filename} in {path}') - - # Pick a fence longer than any backtick run inside the new output. - exp_fence = '```' - while exp_fence in new_output: - exp_fence += '`' - new_block = ( - m.group(1) + - f'\n\n\n{exp_fence}\n{new_output.rstrip(chr(10))}\n{exp_fence}' - ) - text = text[:m.start()] + new_block + text[m.end():] - with open(path, 'w', encoding='utf-8') as f: - f.write(text) - - -def main(): - ap = argparse.ArgumentParser() - ap.add_argument('--update', action='store_true', help='Write new stdout back into the markdown files') - ap.add_argument('--check', action='store_true', help='Verify against expected-output blocks (default)') - ap.add_argument('--filter', default=None, help='Only run snippets whose slug or filename match this substring') - args = ap.parse_args() - - if not args.update and not args.check: - args.check = True - - if not os.path.exists(VENDOR_AUTOLOAD): - print(f'ERROR: {VENDOR_AUTOLOAD} not found. Run `composer install` first.', file=sys.stderr) - sys.exit(2) - - components = load_components_rich() - - matched = 0 - skipped = 0 - drift = [] - failures = [] - pending_writes = [] # (slug, filename, new_output) - - for c in components: - slug = c['slug'] - for section in c['sections']: - snippet = section['snippet'] - if not snippet or not snippet['runnable']: - continue - filename = snippet['filename'] - if args.filter and args.filter not in slug and args.filter not in filename: - continue - rc, stdout, stderr = run_one(snippet['code']) - if rc != 0: - failures.append((slug, filename, (stderr or '').strip().splitlines()[:2])) - skipped += 1 - continue - - key = (slug, filename) - if key in NO_EXPECTED: - matched += 1 - continue - - normalized = normalize(stdout) - expected = snippet['expected_output'] - - # Fenced blocks in markdown don't capture the trailing newline - # before the closing fence, but stdout virtually always ends - # with one. Compare with trailing-newline normalization so the - # markdown round-trip doesn't trip on that convention. - def trim_trailing(s): - return s.rstrip('\n') - - if expected is None: - drift.append((slug, filename, 'NEW (run --update to capture)')) - if args.update: - pending_writes.append((slug, filename, normalized)) - elif trim_trailing(normalize(expected)) != trim_trailing(normalized): - drift.append((slug, filename, 'OUTPUT CHANGED')) - if args.update: - pending_writes.append((slug, filename, normalized)) - else: - matched += 1 - - print(f'\nRan {matched + len(drift)} snippets; {skipped} couldn\'t run locally.') - for slug, filename, why in failures: - why_text = ' '.join(why) if why else '(no stderr)' - print(f' skip {slug}/{filename:<32} {why_text[:80]}') - if args.check: - for slug, filename, kind in drift: - print(f' DRIFT {slug}/{filename:<32} {kind}') - - if args.update: - for slug, filename, new_output in pending_writes: - write_expected_output(slug, filename, new_output) - print(f' wrote {slug}/{filename}') - print(f'\nUpdated {len(pending_writes)} expected-output blocks in markdown.') - sys.exit(0) - - if drift: - print(f'\n{len(drift)} snippet(s) drifted. Run `bin/run-snippets.py --update` to refresh.') - sys.exit(1) - print('\nAll snippets match expected outputs.') - - -if __name__ == '__main__': - main() diff --git a/bin/serve-docs.php b/bin/serve-docs.php new file mode 100644 index 000000000..ce0c7e500 --- /dev/null +++ b/bin/serve-docs.php @@ -0,0 +1,57 @@ + 1 else 8787 -DOCS = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'docs') - - -class CorsHandler(http.server.SimpleHTTPRequestHandler): - def end_headers(self): - self.send_header('Access-Control-Allow-Origin', '*') - self.send_header('Access-Control-Allow-Headers', '*') - super().end_headers() - - -os.chdir(DOCS) - -# Reference pages and the Playground bundle are build artifacts (gitignored). -# Remind the user to regenerate them if they're missing. -missing = [] -if not os.path.exists('reference/html.html'): - missing.append('python3 bin/build-reference.py') -if not os.path.exists('assets/php-toolkit.zip'): - missing.append('bash bin/build-docs-bundle.sh') -if missing: - print('Missing build artifacts. Run from the repo root first:') - for cmd in missing: - print(f' {cmd}') - print() - -print(f'Serving {DOCS} on http://localhost:{PORT}/') -http.server.ThreadingHTTPServer(('', PORT), CorsHandler).serve_forever() diff --git a/components/BlockParser/README.md b/components/BlockParser/README.md index 95cb8c22b..0892524b4 100644 --- a/components/BlockParser/README.md +++ b/components/BlockParser/README.md @@ -7,9 +7,10 @@ credit_title: WordPress core, packaged standalone credit_body: | WP_Block_Parser is WordPress core's block parser, packaged here so importers and linters can read block markup without booting WordPress. Source: WordPress/wordpress-develop. -see_also: html | HTML | Inspect or rewrite the HTML carried by parsed blocks. -see_also: markdown | Markdown | Move between author-friendly Markdown and serialized block markup. -see_also: dataliberation | DataLiberation | Audit and transform blocks while migrating content. +see_also: + - html | HTML | Inspect or rewrite the HTML carried by parsed blocks. + - markdown | Markdown | Move between author-friendly Markdown and serialized block markup. + - dataliberation | DataLiberation | Audit and transform blocks while migrating content. --- WordPress core's block parser, packaged as a standalone library. Turn block markup into a structured tree, lint posts for common authoring mistakes, and audit block usage — all without booting WordPress. diff --git a/components/Blueprints/README.md b/components/Blueprints/README.md index 95bbe7ca5..ad72079ba 100644 --- a/components/Blueprints/README.md +++ b/components/Blueprints/README.md @@ -3,9 +3,10 @@ slug: blueprints title: Blueprints install: wp-php-toolkit/blueprints -see_also: filesystem | Filesystem | Prepare files and fixtures before applying site setup steps. -see_also: httpclient | HttpClient | Download packages or source data as part of provisioning workflows. -see_also: cli | CLI | Wrap repeatable blueprint operations in a small command. +see_also: + - filesystem | Filesystem | Prepare files and fixtures before applying site setup steps. + - httpclient | HttpClient | Download packages or source data as part of provisioning workflows. + - cli | CLI | Wrap repeatable blueprint operations in a small command. --- Declarative WordPress site provisioning. Write a JSON description of plugins, options, and content; let the runner execute it. diff --git a/components/ByteStream/README.md b/components/ByteStream/README.md index 0a40453c8..360c909a2 100644 --- a/components/ByteStream/README.md +++ b/components/ByteStream/README.md @@ -3,9 +3,10 @@ slug: bytestream title: ByteStream install: wp-php-toolkit/bytestream -see_also: filesystem | Filesystem | Back file reads and writes with the same stream primitives. -see_also: zip | Zip | Read and write archive entries one stream at a time. -see_also: httpclient | HttpClient | Process request and response bodies incrementally. +see_also: + - filesystem | Filesystem | Back file reads and writes with the same stream primitives. + - zip | Zip | Read and write archive entries one stream at a time. + - httpclient | HttpClient | Process request and response bodies incrementally. --- Composable streaming primitives for reading, writing, transforming, hashing, and compressing byte data. Pull/peek/consume semantics let parsers backtrack without copying, and deflate, inflate, and checksum filters snap together like Lego. diff --git a/components/CLI/README.md b/components/CLI/README.md index 017eeaf18..7b91f4224 100644 --- a/components/CLI/README.md +++ b/components/CLI/README.md @@ -3,9 +3,10 @@ slug: cli title: CLI install: wp-php-toolkit/cli -see_also: filesystem | Filesystem | Keep command behavior testable with in-memory storage. -see_also: blueprints | Blueprints | Build repeatable site setup commands around parsed options. -see_also: httpserver | HttpServer | Add a local web UI to a CLI workflow. +see_also: + - filesystem | Filesystem | Keep command behavior testable with in-memory storage. + - blueprints | Blueprints | Build repeatable site setup commands around parsed options. + - httpserver | HttpServer | Add a local web UI to a CLI workflow. --- POSIX-style argument parser. Long options, short bundles, inline values, positional args — one static call. diff --git a/components/CORSProxy/README.md b/components/CORSProxy/README.md index 770d2d164..cc6355b72 100644 --- a/components/CORSProxy/README.md +++ b/components/CORSProxy/README.md @@ -3,8 +3,9 @@ slug: corsproxy title: CORSProxy install: wp-php-toolkit/corsproxy -see_also: httpclient | HttpClient | Fetch upstream responses from PHP when browser CORS blocks direct access. -see_also: httpserver | HttpServer | Understand the local-server shape before deploying a proxy endpoint. +see_also: + - httpclient | HttpClient | Fetch upstream responses from PHP when browser CORS blocks direct access. + - httpserver | HttpServer | Understand the local-server shape before deploying a proxy endpoint. --- A small PHP CORS proxy intended for browser-side code that needs to reach servers without CORS headers. diff --git a/components/DataLiberation/README.md b/components/DataLiberation/README.md index 6dd8b02d8..3f9ba60b5 100644 --- a/components/DataLiberation/README.md +++ b/components/DataLiberation/README.md @@ -3,10 +3,11 @@ slug: dataliberation title: DataLiberation install: wp-php-toolkit/data-liberation -see_also: ../learn/03-importing-content.html | Tutorial — Markdown to WXR | The chapter that walks through importing a folder of Markdown files into WordPress via the toolkit. -see_also: markdown | Markdown | Use Markdown as a source or destination format. -see_also: blockparser | BlockParser | Analyze serialized blocks inside post content. -see_also: httpclient | HttpClient | Download media and remote source data while importing. +see_also: + - ../learn/03-importing-content.html | Tutorial — Markdown to WXR | The chapter that walks through importing a folder of Markdown files into WordPress via the toolkit. + - markdown | Markdown | Use Markdown as a source or destination format. + - blockparser | BlockParser | Analyze serialized blocks inside post content. + - httpclient | HttpClient | Download media and remote source data while importing. --- Streaming WordPress import/export. WXR, SQL, block markup — without loading whole datasets into memory. diff --git a/components/Encoding/README.md b/components/Encoding/README.md index e963c6d83..d590178f8 100644 --- a/components/Encoding/README.md +++ b/components/Encoding/README.md @@ -3,9 +3,10 @@ slug: encoding title: Encoding install: wp-php-toolkit/encoding -see_also: html | HTML | Normalize incoming text before HTML tokenization. -see_also: xml | XML | Keep invalid bytes out of XML streams. -see_also: dataliberation | DataLiberation | Clean content before importing it into WordPress. +see_also: + - html | HTML | Normalize incoming text before HTML tokenization. + - xml | XML | Keep invalid bytes out of XML streams. + - dataliberation | DataLiberation | Clean content before importing it into WordPress. --- UTF-8 validation and scrubbing with a pure-PHP fallback when mbstring is unavailable. Detects malformed bytes and replaces them per the Unicode maximal-subpart algorithm. diff --git a/components/Filesystem/README.md b/components/Filesystem/README.md index 492dfee29..00d8c6327 100644 --- a/components/Filesystem/README.md +++ b/components/Filesystem/README.md @@ -3,9 +3,10 @@ slug: filesystem title: Filesystem install: wp-php-toolkit/filesystem -see_also: bytestream | ByteStream | Open files as readers and writers instead of loading full strings. -see_also: zip | Zip | Mount archives and copy data between archive-backed and normal filesystems. -see_also: git | Git | Expose repository trees through a filesystem-shaped API. +see_also: + - bytestream | ByteStream | Open files as readers and writers instead of loading full strings. + - zip | Zip | Mount archives and copy data between archive-backed and normal filesystems. + - git | Git | Expose repository trees through a filesystem-shaped API. --- One Filesystem interface across local disk, in-memory trees, SQLite databases, and ZIP archives. Forward-slash paths everywhere — even on Windows — so the same code runs in tests, in production, and inside read-only ZIPs. diff --git a/components/Git/README.md b/components/Git/README.md index 58076fb53..f31090d81 100644 --- a/components/Git/README.md +++ b/components/Git/README.md @@ -3,9 +3,10 @@ slug: git title: Git install: wp-php-toolkit/git -see_also: filesystem | Filesystem | Work with repository trees through a storage abstraction. -see_also: merge | Merge | Resolve divergent histories with explicit three-way merge logic. -see_also: bytestream | ByteStream | Read and write object data without accidental buffering. +see_also: + - filesystem | Filesystem | Work with repository trees through a storage abstraction. + - merge | Merge | Resolve divergent histories with explicit three-way merge logic. + - bytestream | ByteStream | Read and write object data without accidental buffering. --- A pure-PHP Git client and server. Commits, branches, diffs, HTTP push/pull — all without shelling out to git. diff --git a/components/HTML/README.md b/components/HTML/README.md index b2aa2c50f..f8389acd0 100644 --- a/components/HTML/README.md +++ b/components/HTML/README.md @@ -7,10 +7,11 @@ credit_title: Ported from WordPress core credit_body: | The HTML component is a port of WordPress core's WP_HTML_Tag_Processor and WP_HTML_Processor. Source: WordPress/wordpress-develop. Bug fixes flow in both directions. -see_also: ../learn/01-rewriting-html.html | Tutorial — Rewriting HTML safely | The chapter that introduces the cursor model and the clean_post_html() function reused later in the importer. -see_also: blockparser | BlockParser | Parse block comments first, then rewrite the HTML inside each block. -see_also: markdown | Markdown | Convert Markdown to blocks before polishing generated HTML. -see_also: dataliberation | DataLiberation | Rewrite URLs and media references during import/export pipelines. +see_also: + - ../learn/01-rewriting-html.html | Tutorial — Rewriting HTML safely | The chapter that introduces the cursor model and the clean_post_html() function reused later in the importer. + - blockparser | BlockParser | Parse block comments first, then rewrite the HTML inside each block. + - markdown | Markdown | Convert Markdown to blocks before polishing generated HTML. + - dataliberation | DataLiberation | Rewrite URLs and media references during import/export pipelines. --- A pure-PHP HTML5 parser and tag rewriter mirroring WordPress core's HTML API. Treat HTML the way browsers do — without libxml2, DOMDocument, or regex hacks — and rewrite attributes in a single linear pass. diff --git a/components/HttpClient/README.md b/components/HttpClient/README.md index b62f1d1f2..240359342 100644 --- a/components/HttpClient/README.md +++ b/components/HttpClient/README.md @@ -3,10 +3,11 @@ slug: httpclient title: HttpClient install: wp-php-toolkit/http-client -see_also: ../learn/04-talking-to-the-network.html | Tutorial — Talking to the network | Walks through a streaming downloader that resumes, fans out, and pipes bytes to disk without buffering. -see_also: bytestream | ByteStream | Stream request and response bodies. -see_also: filesystem | Filesystem | Persist large downloads without buffering them in memory. -see_also: corsproxy | CORSProxy | Bridge browser-side tools to servers without CORS headers. +see_also: + - ../learn/04-talking-to-the-network.html | Tutorial — Talking to the network | Walks through a streaming downloader that resumes, fans out, and pipes bytes to disk without buffering. + - bytestream | ByteStream | Stream request and response bodies. + - filesystem | Filesystem | Persist large downloads without buffering them in memory. + - corsproxy | CORSProxy | Bridge browser-side tools to servers without CORS headers. --- Async HTTP client without curl required. Uses sockets when curl is missing, supports concurrent requests and streaming responses. diff --git a/components/HttpServer/README.md b/components/HttpServer/README.md index 2967dfd6a..100dfc54b 100644 --- a/components/HttpServer/README.md +++ b/components/HttpServer/README.md @@ -3,8 +3,9 @@ slug: httpserver title: HttpServer install: wp-php-toolkit/http-server -see_also: cli | CLI | Expose a local browser UI from a command-line tool. -see_also: httpclient | HttpClient | Test client code against a small local fixture server. +see_also: + - cli | CLI | Expose a local browser UI from a command-line tool. + - httpclient | HttpClient | Test client code against a small local fixture server. --- A minimal blocking TCP HTTP server in pure PHP. For CLI tools and tests, not for production traffic. diff --git a/components/Markdown/README.md b/components/Markdown/README.md index 37f09baea..7db1fc402 100644 --- a/components/Markdown/README.md +++ b/components/Markdown/README.md @@ -7,9 +7,10 @@ credit_title: Built on league/commonmark credit_body: | Markdown parsing is delegated to league/commonmark; YAML frontmatter is handled by webuni/front-matter. The toolkit's own work is the bridge between CommonMark's AST and WordPress block markup, in both directions. -see_also: blockparser | BlockParser | Understand the block tree created from Markdown output. -see_also: html | HTML | Rewrite rendered HTML fragments without using DOMDocument. -see_also: dataliberation | DataLiberation | Turn Markdown folders into import/export streams. +see_also: + - blockparser | BlockParser | Understand the block tree created from Markdown output. + - html | HTML | Rewrite rendered HTML fragments without using DOMDocument. + - dataliberation | DataLiberation | Turn Markdown folders into import/export streams. --- Bidirectional converter between Markdown and WordPress block markup. Useful for moving content between Markdown files and WordPress while preserving the structures both formats can express. diff --git a/components/Merge/README.md b/components/Merge/README.md index a6a582f0e..ebab055e7 100644 --- a/components/Merge/README.md +++ b/components/Merge/README.md @@ -3,9 +3,10 @@ slug: merge title: Merge install: wp-php-toolkit/merge -see_also: git | Git | Merge file contents discovered through repository history. -see_also: markdown | Markdown | Resolve file-based editorial workflows before converting to blocks. -see_also: dataliberation | DataLiberation | Make content synchronization conflicts visible. +see_also: + - git | Git | Merge file contents discovered through repository history. + - markdown | Markdown | Resolve file-based editorial workflows before converting to blocks. + - dataliberation | DataLiberation | Make content synchronization conflicts visible. --- Three-way merge and diff. Pluggable differ + merger + optional validator. diff --git a/components/Polyfill/README.md b/components/Polyfill/README.md index ede94bb91..f72d5d86c 100644 --- a/components/Polyfill/README.md +++ b/components/Polyfill/README.md @@ -7,8 +7,9 @@ credit_title: WordPress-shaped behavior credit_body: | When WordPress is loaded, every function in this component defers to WordPress. The standalone implementations of esc_html(), add_filter(), __(), and friends match WordPress core's behavior so the same code runs inside and outside the platform. -see_also: html | HTML | Run WordPress-shaped escaping and translation helpers beside HTML processors. -see_also: blockparser | BlockParser | Keep standalone block tooling familiar outside WordPress. +see_also: + - html | HTML | Run WordPress-shaped escaping and translation helpers beside HTML processors. + - blockparser | BlockParser | Keep standalone block tooling familiar outside WordPress. --- PHP 8 string functions on PHP 7.2+, WordPress hook stubs, and translation/escaping passthroughs so toolkit code runs without WordPress. diff --git a/components/ToolkitCodingStandards/README.md b/components/ToolkitCodingStandards/README.md index d7b9ffe6d..cb7f21b5c 100644 --- a/components/ToolkitCodingStandards/README.md +++ b/components/ToolkitCodingStandards/README.md @@ -3,7 +3,8 @@ slug: coding-standards title: ToolkitCodingStandards install: wp-php-toolkit/toolkit-coding-standards -see_also: polyfill | Polyfill | Share WordPress-style compatibility expectations across standalone packages. +see_also: + - polyfill | Polyfill | Share WordPress-style compatibility expectations across standalone packages. --- PHP_CodeSniffer sniffs used by this project: enforce Yoda comparisons and ban the short ternary where it hides falsy-value bugs. diff --git a/components/XML/README.md b/components/XML/README.md index a4cf4c620..efdf30e44 100644 --- a/components/XML/README.md +++ b/components/XML/README.md @@ -3,9 +3,10 @@ slug: xml title: XML install: wp-php-toolkit/xml -see_also: dataliberation | DataLiberation | Read and write WXR-sized WordPress exports as entities. -see_also: encoding | Encoding | Validate and scrub text before strict XML processing. -see_also: bytestream | ByteStream | Keep large XML reads incremental. +see_also: + - dataliberation | DataLiberation | Read and write WXR-sized WordPress exports as entities. + - encoding | Encoding | Validate and scrub text before strict XML processing. + - bytestream | ByteStream | Keep large XML reads incremental. --- A streaming, namespace-aware XML processor in pure PHP. Read and modify huge feeds, WXR exports, ePub manifests, and Office Open XML parts without ever loading the document into memory and without depending on libxml2. diff --git a/components/Zip/README.md b/components/Zip/README.md index 03e7e6a99..4b08be30a 100644 --- a/components/Zip/README.md +++ b/components/Zip/README.md @@ -3,10 +3,11 @@ slug: zip title: Zip install: wp-php-toolkit/zip -see_also: ../learn/02-streaming-archives.html | Tutorial — Streaming archives | Walk through ZIP and EPUB writers from the toolkit's worked example. -see_also: filesystem | Filesystem | Treat an archive like a swappable filesystem backend. -see_also: bytestream | ByteStream | Feed readers and writers without whole-file buffers. -see_also: httpclient | HttpClient | Stream downloaded archives into validation or extraction workflows. +see_also: + - ../learn/02-streaming-archives.html | Tutorial — Streaming archives | Walk through ZIP and EPUB writers from the toolkit's worked example. + - filesystem | Filesystem | Treat an archive like a swappable filesystem backend. + - bytestream | ByteStream | Feed readers and writers without whole-file buffers. + - httpclient | HttpClient | Stream downloaded archives into validation or extraction workflows. --- Read and write ZIP archives in pure PHP — no libzip, no ZipArchive. Streams entries one at a time, so you can build EPUBs, .docx files, and multi-gigabyte plugin bundles without buffering the archive in memory. diff --git a/phpcs.xml b/phpcs.xml index 02ed3994a..e36b11148 100644 --- a/phpcs.xml +++ b/phpcs.xml @@ -10,6 +10,9 @@ /plugins/git-repo/ /plugins/url-updater/ /bin/build-phar + /bin/build-reference.php + /bin/run-snippets.php + /bin/serve-docs.php /examples/ /docs/ rector.php