diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 000000000..2fb740ede --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,60 @@ +name: Deploy docs to GitHub Pages + +on: + push: + branches: [trunk] + paths: + - 'components/**' + - 'docs/**' + - 'bin/build-docs*' + - 'composer.json' + - 'composer.lock' + - '.github/workflows/docs.yml' + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: pages + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up PHP + uses: shivammathur/setup-php@v2 + with: + php-version: '8.1' + tools: composer + coverage: none + + - name: Install dependencies + run: composer install --no-dev --optimize-autoloader --no-progress + + - name: Bundle toolkit and regenerate docs + run: | + mkdir -p docs/assets + rm -f docs/assets/php-toolkit.zip + zip -qr docs/assets/php-toolkit.zip components vendor bootstrap.php composer.json \ + -x "*/Tests/*" "*/tests/*" "*/.git/*" "*/.github/*" "*/node_modules/*" + python3 bin/build-docs.py + + - uses: actions/upload-pages-artifact@v3 + with: + path: ./docs + + deploy: + needs: build + runs-on: ubuntu-latest + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - id: deployment + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/snippet-tests.yml b/.github/workflows/snippet-tests.yml new file mode 100644 index 000000000..7b8ab1f24 --- /dev/null +++ b/.github/workflows/snippet-tests.yml @@ -0,0 +1,47 @@ +name: Verify docs snippets + +# Runs every PHP snippet in bin/_docs_components.py against the local toolkit +# and compares stdout against bin/_expected_outputs.json. Anything that drifts +# fails CI; anything that errors out also fails CI. +# +# Snippets that can't run locally (need network, a listening port, or other +# Playground-only features) are not in the JSON and are skipped — the runner +# only enforces matches for snippets that already have captured output. + +on: + pull_request: + paths: + - 'components/**' + - 'bin/_docs_components.py' + - 'bin/_expected_outputs.json' + - 'bin/run-snippets.py' + - 'composer.json' + - 'composer.lock' + - '.github/workflows/snippet-tests.yml' + push: + branches: [trunk] + workflow_dispatch: + +jobs: + run-snippets: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up PHP + uses: shivammathur/setup-php@v2 + with: + php-version: '8.3' + tools: composer + coverage: none + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install toolkit dependencies + run: composer install --no-dev --optimize-autoloader --no-progress + + - name: Run docs snippets and compare to expected output + run: bin/run-snippets.py --check diff --git a/bin/_docs_components.py b/bin/_docs_components.py new file mode 100644 index 000000000..29aed0ef1 --- /dev/null +++ b/bin/_docs_components.py @@ -0,0 +1,2871 @@ +# Component catalog for the runnable docs site. Imported by bin/build-docs.py. +# +# Format: list of (slug, title, lede_html, install_package, sections), where sections is a list of +# (heading, body_html, snippet_or_None) +# and snippet is (filename, php_code). Use (filename, php_code, False) for a +# non-runnable example. +# +# Both body_html and php_code may use HTML entities (< > & " ') +# — the renderer in build-docs.py decodes them before output. That keeps the +# embedded snippets readable when this file is edited as Python. + +LOAD = "require '/wordpress/wp-content/php-toolkit/vendor/autoload.php';\n\n" + + +def php(snippet): + return 'WP_HTML_Tag_Processor and WP_HTML_Processor. ' + 'Source: WordPress/wordpress-develop. ' + 'Bug fixes flow in both directions.', + ), + 'blockparser': ( + 'WordPress core, packaged standalone', + 'WP_Block_Parser is WordPress core\'s block parser, ' + 'packaged here so importers and linters can read ' + 'block markup ' + 'without booting WordPress. Source: ' + 'WordPress/wordpress-develop.', + ), + 'markdown': ( + 'Built on league/commonmark', + 'Markdown parsing is delegated to ' + 'league/commonmark; ' + 'YAML frontmatter is handled by ' + 'webuni/front-matter. ' + 'The toolkit\'s own work is the bridge between CommonMark\'s AST and ' + 'WordPress block markup, in both directions.', + ), + 'polyfill': ( + 'WordPress-shaped behavior', + 'When WordPress is loaded, every function in this component defers to WordPress. ' + 'The standalone implementations of esc_html(), add_filter(), ' + '__(), and friends match WordPress core\'s behavior so the same code runs ' + 'inside and outside the platform.', + ), +} + + +COMPONENT_GUIDES = { + 'html': { + 'mental_model': + '

Start with the tag processor when you need to change markup that WordPress already stored: add loading="lazy" to post images, make feed links absolute, or remove inline event handlers from pasted HTML. It scans forward and preserves every byte it does not touch.

' + '

Switch to the full processor when the browser tree matters. Use it to find images inside figures, walk heading depth, or return to a saved parent after inspecting child tags.

', + 'journey': ( + ('Rewrite one tag safely', 'Add image attributes without parsing a DOM or changing surrounding whitespace.'), + ('Protect real content', 'Rewrite relative links, remove script behavior, and add CSP nonces without clobbering author-provided attributes.'), + ('Use structure when you need it', 'Find figure images, print a heading outline, and use bookmarks to annotate a parent after scanning its children.'), + ), + }, + 'zip': { + 'mental_model': + '

Treat a ZIP as a small filesystem with a table of contents at the end. Read the central directory, open one entry stream, and copy that entry where it belongs.

' + '

Use ZipFilesystem when your code wants get_contents() and ls(). Use ZipEncoder and ZipDecoder when the archive format matters, such as an EPUB that must store mimetype first and uncompressed.

', + 'journey': ( + ('Open an archive as files', 'Read readme.txt through ZipFilesystem before touching entry headers.'), + ('Write a format with rules', 'Build an EPUB and make the first entry Stored, not Deflated.'), + ('Move archives through streams', 'Repack entries, reject ../ paths, and copy a remote ZIP entry into another filesystem without a manual byte loop.'), + ), + }, + 'bytestream': { + 'mental_model': + '

A read stream separates three actions: pull bytes, inspect the buffer, then consume the bytes you accepted. That pattern lets a parser wait for a full line, a ZIP decoder wait for a complete header, or an HTTP client report progress without losing data.

' + '

Write streams make the destination boring. The caller writes chunks; the sink decides whether those bytes go to memory, a file, a compressor, or another component.

', + 'journey': ( + ('Read in chunks', 'Pull from memory and files with the same loop, then stop only when the stream reaches end-of-data.'), + ('Handle awkward boundaries', 'Read lines split across chunks and connect producers to consumers with MemoryPipe.'), + ('Add behavior around bytes', 'Wrap streams with gzip, hashing, limits, and windows while keeping the caller on the same interface.'), + ), + }, + 'filesystem': { + 'mental_model': + '

Write your tool against a filesystem object, not against the host machine. Tests can pass an in-memory tree, a CLI command can pass a local directory, and an importer can pass a ZIP-backed filesystem.

' + '

Every toolkit path uses forward slashes. A path such as wp-content/uploads/2026/logo.png means the same thing on macOS, Windows, Playground, and inside an archive.

', + 'journey': ( + ('Start in memory', 'Write and list files without touching disk, which makes examples and tests deterministic.'), + ('Move to a real backend', 'Use local, SQLite, and atomic-write examples to keep the same calling style while changing storage.'), + ('Copy between backends', 'Move a generated theme file from memory to disk, or from a ZIP archive into a local staging directory, through one helper.'), + ), + }, + 'blockparser': { + 'mental_model': + '

The parser turns serialized post content into the block array shape WordPress core returns. It does not render blocks, load block.json, or ask a registry whether a block exists.

' + '

Handle blockName === null first. A real post can contain a paragraph block, a custom block, and loose HTML before or after both.

', + 'journey': ( + ('Inspect the returned shape', 'Parse one paragraph block and read blockName, attrs, innerBlocks, innerHTML, and innerContent.'), + ('Walk the tree', 'Count nested blocks and find custom blocks without writing recursive boilerplate everywhere.'), + ('Ask editorial questions', 'Detect skipped heading levels, stale embeds, and blocks that need a migration before import.'), + ), + }, + 'markdown': { + 'mental_model': + '

Use Markdown for files that humans edit and block markup for content that WordPress stores. This component translates the supported middle ground: headings, paragraphs, lists, code blocks, links, images, and frontmatter-backed metadata.

' + '

Keep unsupported syntax visible. A migration tool should tell you that a file contains an unsupported table instead of silently dropping it before publishing.

', + 'journey': ( + ('Convert one document', 'Turn posts/launch.md into block markup and turn supported blocks back into readable Markdown.'), + ('Carry metadata beside content', 'Read frontmatter for title, slug, date, tags, and import hints.'), + ('Prepare a folder import', 'Map filenames to slugs, audit generated blocks, and hand the result to DataLiberation when you need WXR.'), + ), + }, + 'xml': { + 'mental_model': + '

XMLProcessor walks XML as a cursor. It reads the next tag, exposes attributes and text, records edits, and emits updated XML only when you call get_updated_xml().

' + '

Query namespaces by URI, not by prefix. In WXR, look for http://wordpress.org/export/1.2/ even when the source file writes the prefix as wp:.

', + 'journey': ( + ('Edit one attribute', 'Bump product prices and see how buffered updates keep untouched XML intact.'), + ('Read namespaced exports', 'Find WXR status fields and attachment URLs by namespace URI and local name.'), + ('Process export-sized files', 'Rewrite staging URLs and parse OPML without building a full in-memory tree.'), + ), + }, + 'encoding': { + 'mental_model': + '

Validate text with the Encoding helpers before a stricter parser sees unfamiliar bytes. A Latin-1 title from an old export, an overlong UTF-8 sequence in an upload, or a Unicode noncharacter can break XML, JSON, or a database write later in the pipeline — and the further downstream the failure happens, the harder it is to trace.

' + '

The component gives the same answer whether PHP has mbstring available or falls back to the pure-PHP scanner.

', + 'journey': ( + ('Reject invalid bytes', 'Separate clean UTF-8 from Latin-1 bytes, overlong forms, surrogate halves, and incomplete sequences.'), + ('Repair when content matters', 'Replace invalid bytes with U+FFFD when keeping the rest of a post title beats stopping the import.'), + ('Check downstream limits', 'Detect noncharacters before writing XML or handing text to a system with stricter Unicode rules.'), + ), + }, + 'dataliberation': { + 'mental_model': + '

Model a migration as a stream of WordPress-shaped entities. Read a post, rewrite its content and metadata, write it out, then move to the next entity.

' + '

The useful work happens between readers and writers: rewrite https://staging.example.test inside HTML, block attributes, CSS, GUIDs, and media URLs; download attachments; and keep enough state to resume after a failed request.

', + 'journey': ( + ('Write one entity', 'Create a WXR post record and read it back before building a site-sized pipeline.'), + ('Transform as you stream', 'Rewrite URLs on each entity without loading the whole export.'), + ('Compose a migration', 'Convert a Markdown folder, frontload media with HttpClient, and write WXR through XML and ByteStream layers.'), + ), + }, + 'git': { + 'mental_model': + '

Git stores snapshots as objects: blobs hold file bytes, trees hold directory listings, commits point at trees, and refs name commits.

' + '

This component keeps those objects visible. A browser-based editor can commit generated files, move refs/heads/main, expose a commit tree as a filesystem, and merge another branch without running the git binary.

', + 'journey': ( + ('Create a snapshot', 'Commit files into an in-memory repository and print the resulting object ID.'), + ('Read history by name', 'Resolve refs, walk parent commits, and mount a commit tree with GitFilesystem.'), + ('Coordinate edits', 'Create branches, merge content, and keep conflicts explicit for the caller.'), + ), + }, + 'merge': { + 'mental_model': + '

A three-way merge needs the common base, your version, and their version. The base tells the merger whether two lines changed independently or collided.

' + '

Start with line merges for Markdown, config files, and generated PHP. Move to a domain-specific differ only when lines hide the real unit of change.

', + 'journey': ( + ('See the edit', 'Generate a diff and patch so the merge inputs feel concrete.'), + ('Auto-merge independent lines', 'Combine two edits that touch different parts of the same file.'), + ('Surface conflicts', 'Return conflict records for a UI, CLI prompt, or sync log instead of guessing a winner.'), + ), + }, + 'httpclient': { + 'mental_model': + '

Make the first request boring: GET https://api.wordpress.org/plugins/info/1.2/, then read the response status and body. From there, add the details the workflow actually needs: a POST body, JSON headers, redirects, cache policy, or a chosen transport.

' + '

When the response becomes a file, keep it as a stream. A plugin installer can show progress while downloading a ZIP, resume a partial archive with Range, and hand the remote body to ZipFilesystem without first building a giant string.

', + 'journey': ( + ('Start with GET and POST', 'Fetch a URL, submit form data, and build a JSON request before touching lower-level objects.'), + ('Configure the request path', 'Choose a transport, follow redirects, cache responses, and report failures with useful context.'), + ('Scale the transfer', 'Show progress, keep ten media downloads active, resume a partial ZIP, and stream-unzip a remote archive through Filesystem helpers.'), + ), + }, + 'httpserver': { + 'mental_model': + '

Use HttpServer when a PHP tool needs one local endpoint. A CLI command can open http://127.0.0.1:8765/callback for an OAuth flow, serve fixture JSON to HttpClient tests, or expose a tiny status page during an import.

' + '

The server accepts a connection, parses one request, and gives your handler a response writer. Keep the process lifetime and shutdown rule in your command.

', + 'journey': ( + ('Serve one response', 'Bind to loopback and return text from a handler.'), + ('Route a small local API', 'Branch on method and path for /api/status and /api/echo.'), + ('Buffer when headers depend on the body', 'Use the buffered writer when the runtime needs the full response before sending headers.'), + ), + }, + 'corsproxy': { + 'mental_model': + '

A browser app cannot read https://api.github.com/repos/WordPress/php-toolkit unless GitHub sends CORS headers the app can use. A PHP proxy can fetch that URL server-side and return a controlled browser-readable response.

' + '

Deploy the proxy as a gate, not as an open tunnel. Allow api.github.com and raw.githubusercontent.com for a docs tool; reject private IP ranges, unknown hosts, oversized responses, and credential-bearing request headers.

', + 'journey': ( + ('See the proxy URL shape', 'Request /cors-proxy.php/https://api.github.com/repos/WordPress/php-toolkit from a local PHP server.'), + ('Lock down deployment', 'Add a rate limiter and a host allowlist before exposing the proxy.'), + ('Use it from the browser', 'Wrap fetch() once, then deploy the PHP script behind nginx or another SAPI.'), + ), + }, + 'cli': { + 'mental_model': + '

Define the command-line contract once, then parse argv against it. The parser returns positional arguments and named options; your application validates them and runs the command.

' + '

A command such as toolkit import posts/launch.md --site=demo --dry-run -vv should not need a console framework just to understand flags, values, and positionals.

', + 'journey': ( + ('Parse the smallest command', 'Read one boolean flag and one positional argument.'), + ('Accept normal shell shapes', 'Handle --port=8080, --port 8080, -p 8080, and bundled booleans such as -afv.'), + ('Build command behavior', 'Add required options, help output, and subcommand dispatch in application code.'), + ), + }, + 'polyfill': { + 'mental_model': + '

Load Polyfill when toolkit code runs outside WordPress but still calls WordPress-shaped helpers. Standalone tests can call esc_html(), add a filter, or use a translation stub without booting WordPress.

' + '

The component defines only missing functions. If WordPress or the current PHP runtime already provides a function, the polyfill leaves it alone.

', + 'journey': ( + ('Backfill missing PHP helpers', 'Use PHP 7.2-compatible helpers without dropping support for older runtimes.'), + ('Keep familiar WordPress calls', 'Escape output and keep translation-shaped call sites in standalone tools.'), + ('Expose extension points', 'Register filters and actions for library code that needs hooks outside WordPress.'), + ), + }, + 'blueprints': { + 'mental_model': + '

A Blueprint is a versioned recipe for a WordPress site. It can install Gutenberg, set permalink structure, import content, copy files, and run WP-CLI steps in a predictable order.

' + '

The runner supplies the environment: site root, site URL, execution mode, and filesystem access. The validator checks user-authored JSON before the runner mutates the target site.

', + 'journey': ( + ('Configure the target', 'Create a RunnerConfiguration with the site path, URL, and execution mode.'), + ('Generate repeatable recipes', 'Build JSON from PHP when tests or docs need a fresh site with the same plugins and options.'), + ('Validate before running', 'Catch misspelled step names and missing fields before installing packages or changing options.'), + ), + }, + 'coding-standards': { + 'mental_model': + '

Turn repeat review comments into PHPCS sniffs. If the project always rejects short ternaries, loose comparisons, or a confusing Yoda condition, the tool should report it before a reviewer does.

' + '

Keep each sniff narrow. A useful sniff names the risky pattern and shows the replacement code shape contributors should write.

', + 'journey': ( + ('Enable the ruleset', 'Point PHPCS at the toolkit standard from a component or CI job.'), + ('Read the rule as review guidance', 'Learn why the Yoda and short-ternary sniffs exist instead of treating them as arbitrary style.'), + ('Write the explicit form', 'Replace compact syntax with code that stays clear on PHP 7.2 and across WordPress-style projects.'), + ), + }, +} + + +STARTER_PATHS = ( + ( + 'Content and migration', + 'Start here when you are importing, exporting, rewriting, or auditing WordPress content.', + ('html', 'blockparser', 'markdown', 'xml', 'dataliberation'), + ), + ( + 'Streams and storage', + 'Use this path for archives, large files, testable storage backends, and pure-PHP file movement.', + ('bytestream', 'filesystem', 'zip', 'git', 'merge'), + ), + ( + 'Networked tools', + 'Use this path for clients, local fixture servers, browser-facing proxies, and CLI workflows.', + ('httpclient', 'httpserver', 'corsproxy', 'cli'), + ), + ( + 'WordPress runtime support', + 'Use this path when your code needs WordPress-shaped helpers, repeatable sites, or project-specific review rules.', + ('polyfill', 'blueprints', 'coding-standards'), + ), +) + + +COMPONENT_RELATIONS = { + 'html': ( + ('blockparser', 'BlockParser', 'Parse block comments first, then rewrite the HTML inside each block.'), + ('markdown', 'Markdown', 'Convert Markdown to blocks before polishing generated HTML.'), + ('dataliberation', 'DataLiberation', 'Rewrite URLs and media references during import/export pipelines.'), + ), + 'zip': ( + ('filesystem', 'Filesystem', 'Treat an archive like a swappable filesystem backend.'), + ('bytestream', 'ByteStream', 'Feed readers and writers without whole-file buffers.'), + ('httpclient', 'HttpClient', 'Stream downloaded archives into validation or extraction workflows.'), + ), + 'bytestream': ( + ('filesystem', 'Filesystem', 'Back file reads and writes with the same stream primitives.'), + ('zip', 'Zip', 'Read and write archive entries one stream at a time.'), + ('httpclient', 'HttpClient', 'Process request and response bodies incrementally.'), + ), + 'filesystem': ( + ('bytestream', 'ByteStream', 'Open files as readers and writers instead of loading full strings.'), + ('zip', 'Zip', 'Mount archives and copy data between archive-backed and normal filesystems.'), + ('git', 'Git', 'Expose repository trees through a filesystem-shaped API.'), + ), + 'blockparser': ( + ('html', 'HTML', 'Inspect or rewrite the HTML carried by parsed blocks.'), + ('markdown', 'Markdown', 'Move between author-friendly Markdown and serialized block markup.'), + ('dataliberation', 'DataLiberation', 'Audit and transform blocks while migrating content.'), + ), + 'markdown': ( + ('blockparser', 'BlockParser', 'Understand the block tree created from Markdown output.'), + ('html', 'HTML', 'Rewrite rendered HTML fragments without using DOMDocument.'), + ('dataliberation', 'DataLiberation', 'Turn Markdown folders into import/export streams.'), + ), + 'xml': ( + ('dataliberation', 'DataLiberation', 'Read and write WXR-sized WordPress exports as entities.'), + ('encoding', 'Encoding', 'Validate and scrub text before strict XML processing.'), + ('bytestream', 'ByteStream', 'Keep large XML reads incremental.'), + ), + 'encoding': ( + ('html', 'HTML', 'Normalize incoming text before HTML tokenization.'), + ('xml', 'XML', 'Keep invalid bytes out of XML streams.'), + ('dataliberation', 'DataLiberation', 'Clean content before importing it into WordPress.'), + ), + 'dataliberation': ( + ('markdown', 'Markdown', 'Use Markdown as a source or destination format.'), + ('blockparser', 'BlockParser', 'Analyze serialized blocks inside post content.'), + ('httpclient', 'HttpClient', 'Download media and remote source data while importing.'), + ), + 'git': ( + ('filesystem', 'Filesystem', 'Work with repository trees through a storage abstraction.'), + ('merge', 'Merge', 'Resolve divergent histories with explicit three-way merge logic.'), + ('bytestream', 'ByteStream', 'Read and write object data without accidental buffering.'), + ), + 'merge': ( + ('git', 'Git', 'Merge file contents discovered through repository history.'), + ('markdown', 'Markdown', 'Resolve file-based editorial workflows before converting to blocks.'), + ('dataliberation', 'DataLiberation', 'Make content synchronization conflicts visible.'), + ), + 'httpclient': ( + ('bytestream', 'ByteStream', 'Stream request and response bodies.'), + ('filesystem', 'Filesystem', 'Persist large downloads without buffering them in memory.'), + ('corsproxy', 'CORSProxy', 'Bridge browser-side tools to servers without CORS headers.'), + ), + 'httpserver': ( + ('cli', 'CLI', 'Expose a local browser UI from a command-line tool.'), + ('httpclient', 'HttpClient', 'Test client code against a small local fixture server.'), + ), + 'corsproxy': ( + ('httpclient', 'HttpClient', 'Fetch upstream responses from PHP when browser CORS blocks direct access.'), + ('httpserver', 'HttpServer', 'Understand the local-server shape before deploying a proxy endpoint.'), + ), + 'cli': ( + ('filesystem', 'Filesystem', 'Keep command behavior testable with in-memory storage.'), + ('blueprints', 'Blueprints', 'Build repeatable site setup commands around parsed options.'), + ('httpserver', 'HttpServer', 'Add a local web UI to a CLI workflow.'), + ), + 'polyfill': ( + ('html', 'HTML', 'Run WordPress-shaped escaping and translation helpers beside HTML processors.'), + ('blockparser', 'BlockParser', 'Keep standalone block tooling familiar outside WordPress.'), + ), + 'blueprints': ( + ('filesystem', 'Filesystem', 'Prepare files and fixtures before applying site setup steps.'), + ('httpclient', 'HttpClient', 'Download packages or source data as part of provisioning workflows.'), + ('cli', 'CLI', 'Wrap repeatable blueprint operations in a small command.'), + ), + 'coding-standards': ( + ('polyfill', 'Polyfill', 'Share WordPress-style compatibility expectations across standalone packages.'), + ), +} + + +# =========================================================================== +# HTML +# =========================================================================== +COMPONENTS.append(('html', 'HTML', + 'A pure-PHP HTML5 parser and tag rewriter mirroring WordPress core\'s HTML API. Treat HTML the way browsers do — without libxml2, DOMDocument, or regex hacks — and rewrite attributes in a single linear pass.', + 'wp-php-toolkit/html', + [ + ('Why this exists', + '

WordPress runs HTML fragments through filters every time a request renders: post content, block markup, comments, excerpts, widgets, feeds, imported documents. Those fragments can omit <html> and <body>, close tags implicitly, or mix browser-correct markup with author mistakes that DOMDocument and regular expressions do not model well.

' + '

The HTML component gives WordPress-style code the same parsing model WordPress core uses: a browser-compatible tokenizer and tree-aware processor that run in pure PHP. Choose it for exact-byte rewrites, imperfect fragments, and post-content filters where a full DOM would do too much work.

' + '

The component gives you two processors. WP_HTML_Tag_Processor is a forward-only cursor over tags and tokens — useful for attribute rewriting at scale. WP_HTML_Processor layers HTML5 tree construction on top so you can query by ancestry (breadcrumbs), serialize the parsed document, and trust that <p>one<p>two parses as two paragraphs the way a browser sees it.

' + '

Footgun: mutations are buffered. Nothing changes in the source string until you call get_updated_html(). If you read get_attribute() after a set_attribute() on the same tag, you see the new value — but downstream tooling reading the original string sees stale HTML until you serialize.

', + None), + ('Add loading="lazy" to every image', + '

The "hello world" of tag rewriting. One linear pass, no DOM, no reserialization cost beyond the bytes you actually changed.

' + '

Try this: click Run, then change \'lazy\' to \'eager\' on the first image only by guarding it with $tags->get_attribute( \'src\' ) === \'hero.jpg\'. Run again and notice that get_updated_html() only rewrites the bytes for that one tag.

', + ('lazy-load-images.php', php('''$html = '
+\tHero +\t

Intro copy.

+\tInline +
'; + +$tags = new WP_HTML_Tag_Processor( $html ); +while ( $tags->next_tag( 'img' ) ) { +\t// Don't clobber an explicit eager hint the author already set. +\tif ( null === $tags->get_attribute( 'loading' ) ) { +\t\t$tags->set_attribute( 'loading', 'lazy' ); +\t} +\t$tags->set_attribute( 'decoding', 'async' ); +} + +echo $tags->get_updated_html();'''))), + ('Rewrite relative links to absolute URLs', + '

Use this before sending post content to an RSS feed, an email template, or a CDN-backed copy of a site. The processor rewrites only the changed bytes, so untouched markup stays byte-identical.

', + ('absolute-links.php', php('''$html = '

See about, x, ' +\t. 'and contact.

'; + +$base = 'https://my-site.test/'; + +$tags = new WP_HTML_Tag_Processor( $html ); +while ( $tags->next_tag( 'a' ) ) { +\t$href = $tags->get_attribute( 'href' ); +\tif ( null === $href || '' === $href ) { +\t\tcontinue; +\t} +\tif ( preg_match( '#^[a-z][a-z0-9+.-]*:#i', $href ) || 0 === strpos( $href, '//' ) || 0 === strpos( $href, '#' ) ) { +\t\tcontinue; +\t} +\t$tags->set_attribute( 'href', rtrim( $base, '/' ) . '/' . ltrim( $href, '/' ) ); +} + +echo $tags->get_updated_html();'''))), + ('Strip every script and inline event handler', + '

A common sanitization step: neutralize untrusted HTML before display. Blank a script\'s body with set_modifiable_text() and strip every on* attribute via get_attribute_names_with_prefix().

', + ('sanitize-html.php', php('''$untrusted = '

Hi friend!

' +\t. '' +\t. ''; + +$tags = new WP_HTML_Tag_Processor( $untrusted ); +while ( $tags->next_tag() ) { +\tif ( 'SCRIPT' === $tags->get_tag() && ! $tags->is_tag_closer() ) { +\t\t$tags->set_modifiable_text( '' ); +\t} +\t$on_handlers = $tags->get_attribute_names_with_prefix( 'on' ); +\tif ( $on_handlers ) { +\t\tforeach ( $on_handlers as $name ) { +\t\t\t$tags->remove_attribute( $name ); +\t\t} +\t} +} + +echo $tags->get_updated_html();'''))), + ('Stamp a CSP nonce on inline scripts and styles', + '

Content Security Policy in nonce- mode requires every inline <script> and <style> to carry a matching nonce attribute. Tag-by-tag is exactly the right granularity.

', + ('csp-nonce.php', php('''$nonce = bin2hex( random_bytes( 8 ) ); + +$html = '' +\t. ''; + +$tags = new WP_HTML_Tag_Processor( $html ); +while ( $tags->next_tag() ) { +\t$tag = $tags->get_tag(); +\tif ( ( 'SCRIPT' === $tag || 'STYLE' === $tag ) && ! $tags->is_tag_closer() ) { +\t\t$tags->set_attribute( 'nonce', $nonce ); +\t} +} + +echo "nonce: {$nonce}\\n\\n"; +echo $tags->get_updated_html();'''))), + ('Build a srcset from a single src', + '

Generate responsive image markup at render time without touching the editor data model. Read the existing src, derive a srcset with width descriptors, add a sizes hint.

', + ('srcset-rewrite.php', php('''$html = '
Sunset
'; +$widths = array( 480, 768, 1200 ); + +$tags = new WP_HTML_Tag_Processor( $html ); +while ( $tags->next_tag( 'img' ) ) { +\t$src = $tags->get_attribute( 'src' ); +\tif ( null === $src || $tags->get_attribute( 'srcset' ) !== null ) { +\t\tcontinue; +\t} +\t$variants = array(); +\tforeach ( $widths as $w ) { +\t\t$variants[] = $src . '?w=' . $w . ' ' . $w . 'w'; +\t} +\t$tags->set_attribute( 'srcset', implode( ', ', $variants ) ); +\t$tags->set_attribute( 'sizes', '(max-width: 768px) 100vw, 768px' ); +} + +echo $tags->get_updated_html();'''))), + ('Decode HTML entities the way the spec demands', + '

The HTML5 entity table has roughly 2,200 named references and a long list of edge cases. WP_HTML_Decoder implements the algorithm — don\'t roll your own.

', + ('decode-entities.php', php('''echo "attribute: " . WP_HTML_Decoder::decode_attribute( 'path?a=1&b=2&copy' ) . "\\n"; +echo "text: " . WP_HTML_Decoder::decode_text_node( 'AT&T — 100% 😀' ) . "\\n"; + +// Safe URL prefix check that respects encoded colons (a classic XSS vector). +$is_javascript = WP_HTML_Decoder::attribute_starts_with( +\t'java script:alert(1)', +\t'javascript:', +\t'ascii-case-insensitive' +); +var_dump( $is_javascript );'''))), + ('Find images by ancestry with breadcrumbs', + '

The full WP_HTML_Processor understands HTML5 tree construction, so you can ask "find every <img> directly inside a <figure>" without writing your own DOM walker.

', + ('breadcrumbs.php', php('''$html = '
' +\t. '
Hero
Hero shot
' +\t. '

Body copy mid-paragraph.

' +\t. '
Diagram
' +\t. '
'; + +$p = WP_HTML_Processor::create_fragment( $html ); +$figure_images = 0; +while ( $p->next_tag( array( 'breadcrumbs' => array( 'FIGURE', 'IMG' ) ) ) ) { +\t$p->add_class( 'figure-image' ); +\t$figure_images++; +} + +echo "found {$figure_images} figure images\\n"; +echo $p->get_updated_html();'''))), + ('Outline a document by walking tokens with depth', + '

The full processor exposes get_current_depth() and get_breadcrumbs(). Combine with next_token() to print a structural outline.

', + ('outline.php', php('''$html = '

Title

' +\t. '

Chapter 1

Body

' +\t. '

Chapter 2

More body

' +\t. '
'; + +$p = WP_HTML_Processor::create_fragment( $html ); +while ( $p->next_token() ) { +\tif ( '#tag' !== $p->get_token_type() || $p->is_tag_closer() ) { +\t\tcontinue; +\t} +\t$tag = $p->get_tag(); +\tif ( ! preg_match( '/^H[1-6]$/', $tag ) ) { +\t\tcontinue; +\t} +\t$indent = str_repeat( ' ', max( 0, $p->get_current_depth() - 2 ) ); +\t$text = ''; +\twhile ( $p->next_token() ) { +\t\tif ( '#text' === $p->get_token_type() ) { +\t\t\t$text .= $p->get_modifiable_text(); +\t\t\tcontinue; +\t\t} +\t\tif ( '#tag' === $p->get_token_type() && $tag === $p->get_tag() && $p->is_tag_closer() ) { +\t\t\tbreak; +\t\t} +\t} +\techo "{$indent}{$tag} {$text}\\n"; +}'''))), + ('Bookmarks: annotate a parent based on its children', + '

Bookmarks are the one escape from forward-only scanning. Save a position, scan ahead, decide what to do, then seek() back and rewrite the earlier tag.

', + ('bookmarks.php', php('''$html = ''; + +$tags = new WP_HTML_Tag_Processor( $html ); +$tags->next_tag( 'ul' ); +$tags->set_bookmark( 'list' ); + +$total = 0; +$done = 0; +while ( $tags->next_tag( 'input' ) ) { +\t$total++; +\tif ( null !== $tags->get_attribute( 'checked' ) ) { +\t\t$done++; +\t} +} + +$tags->seek( 'list' ); +$tags->set_attribute( 'data-progress', $done . '/' . $total ); +$tags->release_bookmark( 'list' ); + +echo $tags->get_updated_html();'''))), + ])) + +# =========================================================================== +# Zip +# =========================================================================== +COMPONENTS.append(('zip', 'Zip', + 'Read and write ZIP archives in pure PHP — no libzip, no ZipArchive. Streams entries one at a time, so you can build EPUBs, .docx files, and multi-gigabyte plugin bundles without buffering the archive in memory.', + 'wp-php-toolkit/zip', + [ + ('Why this exists', + '

Common PHP ZIP workflows rely on the ZipArchive extension or shelling out to zip. Those are awkward in hosts without libzip, WebAssembly builds, and code paths that need to stream archive data through toolkit byte streams.

' + '

The Zip component reads and writes Stored and Deflate archives in pure PHP. The decoder is pull-based, so listing the central directory of a 2 GB ZIP costs roughly the size of the directory itself. The encoder accepts any ByteWriteStream as a sink and writes one entry at a time.

', + None), + ('Read a file out of a ZIP', + '

ZipFilesystem implements this toolkit\'s Filesystem interface, so once you wrap the byte reader you can call get_contents(), ls(), and is_dir() just like the other filesystem backends.

' + '

Try this: after Run, add a second append_file() call before $enc->close() for a notes.md entry, then call print_r( $zip->ls( \'/\' ) ) at the end. The directory listing reflects the new entry without re-reading the file.

', + ('teaser-read.php', php('''use WordPress\\ByteStream\\MemoryPipe; +use WordPress\\ByteStream\\ReadStream\\FileReadStream; +use WordPress\\ByteStream\\WriteStream\\FileWriteStream; +use WordPress\\Zip\\FileEntry; +use WordPress\\Zip\\ZipDecoder; +use WordPress\\Zip\\ZipEncoder; +use WordPress\\Zip\\ZipFilesystem; + +$path = tempnam( sys_get_temp_dir(), 'demo' ) . '.zip'; +$out = FileWriteStream::from_path( $path, 'truncate' ); +$enc = new ZipEncoder( $out ); +$enc->append_file( new FileEntry( array( +\t'path' => 'readme.txt', +\t'compression_method' => ZipDecoder::COMPRESSION_NONE, +\t'body_reader' => new MemoryPipe( 'Hello from inside the zip.' ), +) ) ); +$enc->close(); +$out->close_writing(); + +$zip = ZipFilesystem::create( FileReadStream::from_path( $path ) ); +echo $zip->get_contents( 'readme.txt' );'''))), + ('Build an EPUB from scratch', + '

An EPUB follows one strict ZIP rule: write the mimetype entry first and store it without compression. Deflate the rest of the archive normally.

' + '

Gotcha: e-readers reject EPUBs whose mimetype entry has compression. Use COMPRESSION_NONE for that single entry.

', + ('epub.php', php('''use WordPress\\ByteStream\\MemoryPipe; +use WordPress\\ByteStream\\ReadStream\\FileReadStream; +use WordPress\\ByteStream\\WriteStream\\FileWriteStream; +use WordPress\\Zip\\FileEntry; +use WordPress\\Zip\\ZipDecoder; +use WordPress\\Zip\\ZipEncoder; +use WordPress\\Zip\\ZipFilesystem; + +$path = tempnam( sys_get_temp_dir(), 'book' ) . '.epub'; +$out = FileWriteStream::from_path( $path, 'truncate' ); +$enc = new ZipEncoder( $out ); + +// 1) The mimetype entry MUST be first and stored uncompressed. +$enc->append_file( new FileEntry( array( +\t'path' => 'mimetype', +\t'compression_method' => ZipDecoder::COMPRESSION_NONE, +\t'body_reader' => new MemoryPipe( 'application/epub+zip' ), +) ) ); + +$container = '' +\t. '' +\t. '' +\t. ''; + +foreach ( array( +\t'META-INF/container.xml' => $container, +\t'EPUB/package.opf' => '', +\t'EPUB/chapter1.xhtml' => '

Chapter 1

It was a dark and stormy night.

', +) as $name => $body ) { +\t$enc->append_file( new FileEntry( array( +\t\t'path' => $name, +\t\t'compression_method' => ZipDecoder::COMPRESSION_DEFLATE, +\t\t'body_reader' => new MemoryPipe( $body ), +\t) ) ); +} +$enc->close(); +$out->close_writing(); + +$zip = ZipFilesystem::create( FileReadStream::from_path( $path ) ); +printf( "mimetype: %s\\n", $zip->get_contents( 'mimetype' ) ); +printf( "size on disk: %d bytes\\n", filesize( $path ) );'''))), + ('Stream a large entry without buffering it', + '

Calling get_contents() on a 500 MB CSV inside a ZIP would eat 500 MB of RAM. Use open_read_stream() instead and inflate-as-you-go.

' + '

Gotcha: only one entry stream open at a time. Drain or finish the previous stream before opening the next.

', + ('stream-large.php', php('''use WordPress\\ByteStream\\MemoryPipe; +use WordPress\\ByteStream\\ReadStream\\FileReadStream; +use WordPress\\ByteStream\\WriteStream\\FileWriteStream; +use WordPress\\Zip\\FileEntry; +use WordPress\\Zip\\ZipDecoder; +use WordPress\\Zip\\ZipEncoder; +use WordPress\\Zip\\ZipFilesystem; + +$path = tempnam( sys_get_temp_dir(), 'big' ) . '.zip'; +$out = FileWriteStream::from_path( $path, 'truncate' ); +$enc = new ZipEncoder( $out ); +$enc->append_file( new FileEntry( array( +\t'path' => 'data.csv', +\t'compression_method' => ZipDecoder::COMPRESSION_DEFLATE, +\t'body_reader' => new MemoryPipe( str_repeat( "id,value,timestamp\\n1,foo,2024\\n2,bar,2024\\n", 5000 ) ), +) ) ); +$enc->close(); +$out->close_writing(); + +$zip = ZipFilesystem::create( FileReadStream::from_path( $path ) ); +$stream = $zip->open_read_stream( 'data.csv' ); + +$rows = 0; +$bytes = 0; +$tail = ''; +while ( ! $stream->reached_end_of_data() ) { +\t$n = $stream->pull( 8192 ); +\tif ( 0 === $n ) break; +\t$chunk = $tail . $stream->consume( $n ); +\t$lines = explode( "\\n", $chunk ); +\t$tail = array_pop( $lines ); +\t$rows += count( $lines ); +\t$bytes += $n; +} +printf( "Inflated %d bytes in 8 KB chunks, parsed %d rows.\\n", $bytes, $rows );'''))), + ('Repack: modify one file, copy the rest', + '

Updating one file in a ZIP without rewriting the others is impossible at the format level — the central directory points at byte offsets. The pragmatic answer is repack: stream the source archive into a new one, swapping the file you care about.

', + ('repack.php', php('''use WordPress\\ByteStream\\MemoryPipe; +use WordPress\\ByteStream\\ReadStream\\FileReadStream; +use WordPress\\ByteStream\\WriteStream\\FileWriteStream; +use WordPress\\Zip\\FileEntry; +use WordPress\\Zip\\ZipDecoder; +use WordPress\\Zip\\ZipEncoder; +use WordPress\\Zip\\ZipFilesystem; + +$src_path = tempnam( sys_get_temp_dir(), 'orig' ) . '.zip'; +$src_out = FileWriteStream::from_path( $src_path, 'truncate' ); +$src_enc = new ZipEncoder( $src_out ); +foreach ( array( +\t'config.json' => '{"debug":false,"version":"1.0"}', +\t'app/index.php' => ' 'body{color:#333}', +) as $name => $body ) { +\t$src_enc->append_file( new FileEntry( array( +\t\t'path' => $name, +\t\t'compression_method' => ZipDecoder::COMPRESSION_DEFLATE, +\t\t'body_reader' => new MemoryPipe( $body ), +\t) ) ); +} +$src_enc->close(); +$src_out->close_writing(); + +$source = ZipFilesystem::create( FileReadStream::from_path( $src_path ) ); +$dst_path = tempnam( sys_get_temp_dir(), 'repacked' ) . '.zip'; +$dst_out = FileWriteStream::from_path( $dst_path, 'truncate' ); +$dst_enc = new ZipEncoder( $dst_out ); + +$dirs = array( '/' ); +while ( $dirs ) { +\t$dir = array_shift( $dirs ); +\tforeach ( $source->ls( $dir ) as $name ) { +\t\t$path = rtrim( $dir, '/' ) . '/' . $name; +\t\tif ( $source->is_dir( $path ) ) { +\t\t\t$dirs[] = $path; +\t\t\tcontinue; +\t\t} +\t\t$rel = ltrim( $path, '/' ); +\t\t$body = ( 'config.json' === $rel ) +\t\t\t? '{"debug":true,"version":"1.0.1"}' +\t\t\t: $source->get_contents( $rel ); +\t\t$dst_enc->append_file( new FileEntry( array( +\t\t\t'path' => $rel, +\t\t\t'compression_method' => ZipDecoder::COMPRESSION_DEFLATE, +\t\t\t'body_reader' => new MemoryPipe( $body ), +\t\t) ) ); +\t} +} +$dst_enc->close(); +$dst_out->close_writing(); + +$repacked = ZipFilesystem::create( FileReadStream::from_path( $dst_path ) ); +echo "new config.json: " . $repacked->get_contents( 'config.json' ) . "\\n"; +echo "untouched: " . $repacked->get_contents( 'app/index.php' ) . "\\n";'''))), + ('Defend against zip-slip', + '

A malicious archive can name an entry ../../etc/passwd and trick a naive extractor into clobbering files outside the destination. ZipDecoder::sanitize_path() strips leading ../ segments and collapses internal /../ sequences before exposing the path.

', + ('zip-slip.php', php('''use WordPress\\Zip\\ZipDecoder; + +$evil_inputs = array( +\t'../../etc/passwd', +\t'./safe/path.txt', +\t'a/../../b/secret', +\t'a//b///c.txt', +\t'../../../../root/.ssh/authorized_keys', +); +foreach ( $evil_inputs as $name ) { +\tprintf( "%-45s => %s\\n", $name, ZipDecoder::sanitize_path( $name ) ); +}'''))), + ('Pipe ZIP entries into an InMemoryFilesystem', + '

Real-world recipe: take an uploaded plugin ZIP, expand it into an InMemoryFilesystem so you can validate, edit, or scan it before it ever touches disk. Three components compose into something you couldn\'t build with ZipArchive alone.

', + ('zip-to-memfs.php', php('''use WordPress\\ByteStream\\MemoryPipe; +use WordPress\\ByteStream\\ReadStream\\FileReadStream; +use WordPress\\ByteStream\\WriteStream\\FileWriteStream; +use WordPress\\Filesystem\\InMemoryFilesystem; +use WordPress\\Zip\\FileEntry; +use WordPress\\Zip\\ZipDecoder; +use WordPress\\Zip\\ZipEncoder; +use WordPress\\Zip\\ZipFilesystem; +use function WordPress\\Filesystem\\copy_between_filesystems; + +$path = tempnam( sys_get_temp_dir(), 'app' ) . '.zip'; +$out = FileWriteStream::from_path( $path, 'truncate' ); +$enc = new ZipEncoder( $out ); +foreach ( array( +\t'app/index.php' => ' ' 'body{margin:0}', +\t'app/README.md' => '# App', +) as $name => $body ) { +\t$enc->append_file( new FileEntry( array( +\t\t'path' => $name, +\t\t'compression_method' => ZipDecoder::COMPRESSION_DEFLATE, +\t\t'body_reader' => new MemoryPipe( $body ), +\t) ) ); +} +$enc->close(); +$out->close_writing(); + +$zip = ZipFilesystem::create( FileReadStream::from_path( $path ) ); +$mem = InMemoryFilesystem::create(); +copy_between_filesystems( array( +\t'source_filesystem' => $zip, +\t'source_path' => '/', +\t'target_filesystem' => $mem, +\t'target_path' => '/', +) ); + +$mem->put_contents( '/app/VERSION', '1.0.0' ); +echo "files now in memory:\\n"; +$dirs = array( '/' ); +$files = array(); +while ( $dirs ) { +\t$dir = array_shift( $dirs ); +\tforeach ( $mem->ls( $dir ) as $name ) { +\t\t$p = rtrim( $dir, '/' ) . '/' . $name; +\t\tif ( $mem->is_dir( $p ) ) { +\t\t\t$dirs[] = $p; +\t\t\tcontinue; +\t\t} +\t\t$files[] = $p; +\t} +} +sort( $files ); +foreach ( $files as $path ) { +\techo " " . $path . "\\n"; +}'''))), + ])) + +# =========================================================================== +# ByteStream +# =========================================================================== +COMPONENTS.append(('bytestream', 'ByteStream', + 'Composable streaming primitives for reading, writing, transforming, hashing, and compressing byte data. Pull/peek/consume semantics let parsers backtrack without copying, and deflate, inflate, and checksum filters snap together like Lego.', + 'wp-php-toolkit/bytestream', + [ + ('Why this exists', + '

PHP\'s native streams are powerful but inconsistent. fread on a socket may return short reads with no warning; stream_filter_append is awkward to compose; gzip helpers and file handles expose different APIs. The ByteStream component normalizes these behind one small interface — pull / peek / consume — so a parser, a hash function, and a deflate filter all see the same shape.

' + '

The split between pull (buffer up to N bytes) and consume (advance past N bytes) is the secret. Parsers can peek ahead to detect a record boundary and decide whether to consume, without copying or allocating.

', + None), + ('Read a file in chunks', + '

The canonical loop. pull(N) reads up to N bytes from the underlying source into an internal buffer and returns how many ended up there; consume(N) reads N bytes from that buffer and advances past them. The buffer never grows beyond the chunk size you ask for.

', + ('teaser-read.php', php('''use WordPress\\ByteStream\\ReadStream\\FileReadStream; + +$path = tempnam( sys_get_temp_dir(), 'demo' ); +file_put_contents( $path, str_repeat( "log line\\n", 200 ) ); + +$reader = FileReadStream::from_path( $path ); +$total = 0; +while ( ! $reader->reached_end_of_data() ) { +\t$n = $reader->pull( 256 ); +\tif ( 0 === $n ) break; +\t$total += strlen( $reader->consume( $n ) ); +} +$reader->close_reading(); +echo "Read {$total} bytes in 256-byte chunks.\\n";'''))), + ('MemoryPipe as write-then-read buffer', + '

MemoryPipe is bidirectional: you append_bytes() as a writer and pull/consume as a reader. Easiest way to wire one component\'s output into another\'s input.

' + '

Gotcha: a producer must call close_writing() when done — otherwise the consumer eventually throws NotEnoughDataException instead of seeing EOF.

', + ('memory-pipe.php', php('''use WordPress\\ByteStream\\MemoryPipe; + +$pipe = new MemoryPipe(); +$pipe->append_bytes( "first chunk\\n" ); +$pipe->append_bytes( "second chunk\\n" ); +$pipe->append_bytes( "third chunk\\n" ); +$pipe->close_writing(); + +while ( ! $pipe->reached_end_of_data() ) { +\t$n = $pipe->pull( 1024 ); +\tif ( 0 === $n ) break; +\techo "got: " . $pipe->consume( $n ); +}'''))), + ('Compress on the way in, decompress on the way out', + '

Wrap a stream in DeflateReadStream to get compressed bytes out; wrap it in InflateReadStream to get decompressed bytes out. Both are full ByteReadStream implementations, so they nest into anything else that takes a stream.

', + ('deflate-roundtrip.php', php('''use WordPress\\ByteStream\\MemoryPipe; +use WordPress\\ByteStream\\ReadStream\\DeflateReadStream; +use WordPress\\ByteStream\\ReadStream\\InflateReadStream; + +$original = str_repeat( "the quick brown fox. ", 50 ); + +$src = new MemoryPipe( $original ); +$src->close_writing(); +$deflated = new DeflateReadStream( $src, ZLIB_ENCODING_DEFLATE ); +$compressed = $deflated->consume_all(); + +$src2 = new MemoryPipe( $compressed ); +$src2->close_writing(); +$inflated = new InflateReadStream( $src2, ZLIB_ENCODING_DEFLATE ); +$round = $inflated->consume_all(); + +printf( "original : %d bytes\\n", strlen( $original ) ); +printf( "deflated : %d bytes (%.1f%%)\\n", strlen( $compressed ), 100 * strlen( $compressed ) / strlen( $original ) ); +printf( "round-trip: %s\\n", $round === $original ? 'OK' : 'BROKEN' );'''))), + ('Line-by-line reads from a chunked source', + '

Reading text by line means handling chunk boundaries that fall mid-line. Keep the trailing partial line and prepend it to the next pull. The rest of the loop pretends the data was always whole.

', + ('lines.php', php('''use WordPress\\ByteStream\\MemoryPipe; + +$pipe = new MemoryPipe(); +$pipe->append_bytes( "alpha\\nbravo\\ncharl" ); +$pipe->append_bytes( "ie\\ndelta\\necho\\n" ); +$pipe->close_writing(); + +$tail = ''; +$count = 0; +while ( ! $pipe->reached_end_of_data() ) { +\t$n = $pipe->pull( 8 ); +\tif ( 0 === $n ) break; +\t$buf = $tail . $pipe->consume( $n ); +\t$lines = explode( "\\n", $buf ); +\t$tail = array_pop( $lines ); +\tforeach ( $lines as $line ) { +\t\tprintf( "[%d] %s\\n", ++$count, $line ); +\t} +} +if ( '' !== $tail ) { +\tprintf( "[%d] %s\\n", ++$count, $tail ); +}'''))), + ('Limit a stream to a fixed window', + '

LimitedByteReadStream exposes only the next N bytes of an underlying stream as if those were the entire stream. This is how the ZIP decoder hands you the body of one entry without letting you read into the next.

', + ('limited.php', php('''use WordPress\\ByteStream\\MemoryPipe; +use WordPress\\ByteStream\\ReadStream\\LimitedByteReadStream; + +$source = new MemoryPipe( "HEADER:42|BODY:hello there|FOOTER:done" ); +$source->close_writing(); + +$source->pull( 10 ); +$source->consume( 10 ); + +$body = new LimitedByteReadStream( $source, 16 ); +echo "body sees: " . $body->consume_all() . "\\n"; +echo "remaining in source: " . $source->consume_all() . "\\n";'''))), + ])) + +# =========================================================================== +# Filesystem +# =========================================================================== +COMPONENTS.append(('filesystem', 'Filesystem', + 'One Filesystem interface across local disk, in-memory trees, SQLite databases, and ZIP archives. Forward-slash paths everywhere — even on Windows — so the same code runs in tests, in production, and inside read-only ZIPs.', + 'wp-php-toolkit/filesystem', + [ + ('Why this exists', + '

Code that touches the filesystem is hard to test, hard to port to Windows, and impossible to point at non-disk storage without rewriting it. Swap LocalFilesystem for InMemoryFilesystem in tests and your suite stops touching /tmp; swap it for SQLiteFilesystem and your "files" become rows in a portable database; swap it for ZipFilesystem and you can read inside an archive with the same calls.

' + '

Every backend uses forward slashes regardless of host OS. No DIRECTORY_SEPARATOR juggling, no Windows-only test failures, no surprises when a path moves between backends.

', + None), + ('In-memory tree', + '

The fastest backend. No disk I/O, no cleanup, no test-isolation problems.

', + ('teaser-memory.php', php('''use WordPress\\Filesystem\\InMemoryFilesystem; + +$fs = InMemoryFilesystem::create(); +$fs->put_contents( '/hello.txt', 'Hello, world!' ); +echo $fs->get_contents( '/hello.txt' );'''))), + ('Test code without touching disk', + '

Code that takes a Filesystem parameter, instead of calling file_get_contents() directly, can be tested against an InMemoryFilesystem. The test sets up files in memory, exercises the function, and asserts on what got written — no temp directories, no cleanup.

', + ('test-without-disk.php', php('''use WordPress\\Filesystem\\Filesystem; +use WordPress\\Filesystem\\InMemoryFilesystem; + +function bump_version( Filesystem $fs, $path ) { +\t$json = json_decode( $fs->get_contents( $path ), true ); +\tlist( $maj, $min, $patch ) = explode( '.', $json['version'] ); +\t$json['version'] = $maj . '.' . $min . '.' . ( (int) $patch + 1 ); +\t$fs->put_contents( $path, json_encode( $json ) ); +} + +$fs = InMemoryFilesystem::create(); +$fs->put_contents( '/package.json', '{"version":"1.2.3"}' ); +bump_version( $fs, '/package.json' ); + +echo $fs->get_contents( '/package.json' ) . "\\n";'''))), + ('Local disk with a chrooted root', + '

LocalFilesystem::create($root) is implicitly chrooted: every path resolves relative to $root and a ../ cannot escape. Reach for it when a request path or CLI argument names a file inside one project directory.

', + ('local-chroot.php', php('''use WordPress\\Filesystem\\LocalFilesystem; + +$root = sys_get_temp_dir() . '/toolkit-' . uniqid(); +$fs = LocalFilesystem::create( $root ); + +$fs->mkdir( '/uploads', array( 'recursive' => true ) ); +$fs->put_contents( '/uploads/note.txt', 'Hi from local disk.' ); + +echo $fs->get_contents( '/uploads/../uploads/note.txt' ) . "\\n"; + +$fs->rmdir( '/', array( 'recursive' => true ) ); +echo "exists after cleanup? " . ( is_dir( $root ) ? 'yes' : 'no' ) . "\\n";'''))), + ('SQLite as a portable file store', + '

The whole tree lives in one SQLite database file. Use it for self-contained scratch storage that survives process boundaries without leaving loose files behind.

', + ('sqlite.php', php('''use WordPress\\Filesystem\\SQLiteFilesystem; + +$fs = SQLiteFilesystem::create( ':memory:' ); +$fs->mkdir( '/posts', array( 'recursive' => true ) ); +for ( $i = 1; $i <= 3; $i++ ) { +\t$fs->put_contents( "/posts/post-{$i}.md", "# Post {$i}\\n\\nBody {$i}." ); +} + +foreach ( $fs->ls( '/posts' ) as $name ) { +\t$first = strtok( $fs->get_contents( '/posts/' . $name ), "\\n" ); +\techo "{$name}: {$first}\\n"; +}'''))), + ('Copy a tree across backends', + '

The killer composability move: copy_between_filesystems() streams files chunk-by-chunk from any source to any target. Pull a ZIP into SQLite, snapshot SQLite to disk, mirror disk into RAM — all the same call.

', + ('cross-backend-copy.php', php('''use WordPress\\Filesystem\\InMemoryFilesystem; +use WordPress\\Filesystem\\LocalFilesystem; +use WordPress\\Filesystem\\SQLiteFilesystem; +use function WordPress\\Filesystem\\copy_between_filesystems; + +$root = sys_get_temp_dir() . '/copytree-' . uniqid(); +$local = LocalFilesystem::create( $root ); +$local->mkdir( '/site/posts', array( 'recursive' => true ) ); +$local->put_contents( '/site/posts/2024-01.md', '# Hello 2024' ); +$local->put_contents( '/site/index.html', '

Home

' ); + +$sqlite = SQLiteFilesystem::create( ':memory:' ); +copy_between_filesystems( array( +\t'source_filesystem' => $local, +\t'source_path' => '/site', +\t'target_filesystem' => $sqlite, +\t'target_path' => '/snapshot', +) ); + +$mem = InMemoryFilesystem::create(); +copy_between_filesystems( array( +\t'source_filesystem' => $sqlite, +\t'source_path' => '/snapshot', +\t'target_filesystem' => $mem, +\t'target_path' => '/copy', +) ); + +echo "in memory after two copies:\\n"; +echo " posts: " . implode( ', ', $mem->ls( '/copy/posts' ) ) . "\\n"; +echo " index: " . $mem->get_contents( '/copy/index.html' ) . "\\n"; + +$local->rmdir( '/', array( 'recursive' => true ) );'''))), + ('Atomic write via tempfile rename', + '

Write to a sibling tempfile, then rename — that\'s how you avoid leaving a half-written file on crash. rename() is atomic within a single filesystem.

', + ('atomic-write.php', php('''use WordPress\\Filesystem\\Filesystem; +use WordPress\\Filesystem\\LocalFilesystem; + +function atomic_put_contents( Filesystem $fs, $path, $bytes ) { +\t$tmp = $path . '.tmp.' . bin2hex( random_bytes( 4 ) ); +\t$fs->put_contents( $tmp, $bytes ); +\t$fs->rename( $tmp, $path ); +} + +$root = sys_get_temp_dir() . '/atomic-' . uniqid(); +$fs = LocalFilesystem::create( $root ); + +$fs->put_contents( '/config.json', '{"v":1}' ); +atomic_put_contents( $fs, '/config.json', '{"v":2}' ); + +echo "config: " . $fs->get_contents( '/config.json' ) . "\\n"; +echo "no .tmp leftovers: " . count( $fs->ls( '/' ) ) . " entries in root\\n"; + +$fs->rmdir( '/', array( 'recursive' => true ) );'''))), + ('Path helpers that behave the same on Windows', + '

Unix path semantics apply on every host OS. This matters for abstract paths such as a SQLite key or a ZIP entry name because those paths do not live on a real drive.

', + ('path-helpers.php', php('''use function WordPress\\Filesystem\\wp_join_unix_paths; +use function WordPress\\Filesystem\\wp_unix_dirname; +use function WordPress\\Filesystem\\wp_unix_path_resolve_dots; + +echo wp_join_unix_paths( '/var/www', '/site/', '/index.php' ) . "\\n"; +echo wp_unix_dirname( '/a/b/c/d.txt', 2 ) . "\\n"; +echo wp_unix_path_resolve_dots( '/a/b/../c/./d/../e' ) . "\\n";'''))), + ])) + +# =========================================================================== +# BlockParser +# =========================================================================== +COMPONENTS.append(('blockparser', 'BlockParser', + 'WordPress core\'s block parser, packaged as a standalone library. Turn block markup into a structured tree, lint posts for common authoring mistakes, and audit block usage — all without booting WordPress.', + 'wp-php-toolkit/blockparser', + [ + ('Why this exists', + '

Block markup is not plain HTML. A post can contain HTML comments that identify blocks, JSON attributes inside those comments, freeform HTML between blocks, and nested blocks whose rendered HTML is interleaved with parent markup.

' + '

This component packages WordPress core\'s block parser so importers, linters, migration tools, and static analyzers can understand block content without loading WordPress. It deliberately mirrors core behavior — same array shape, same null blocks for freeform HTML, same core block names such as core/paragraph — so code written against this parser keeps working when run inside WordPress, and vice versa.

' + '

Reach for it when you need answers about the block tree: which blocks a post uses, which attributes they carry, where nested blocks appear, or whether content violates a rule your project cares about.

', + None), + ('What you get back', + '

WP_Block_Parser::parse() returns an array of blocks. Each block is an associative array with five keys: blockName, attrs, innerBlocks, innerHTML, and innerContent.

' + '

innerHTML is the HTML inside the block with inner blocks stripped out. innerContent is the interleaved version: an array of HTML strings with null placeholders marking where each inner block belongs.

' + '

Most code starts by checking blockName, then reading attrs or innerHTML. When a post has container blocks such as Group, Columns, or Navigation, look inside innerBlocks too.

' + '

Footgun: freeform HTML between blocks shows up as a block with blockName === null. Always skip that case before comparing names.

', + None), + ('Parse a document', + '

The simplest possible use. Pass a string, get back a tree.

', + ('parse.php', php('''$document = "\\n

Welcome

\\n\\n\\n" +\t. "\\n

Hello from the block editor.

\\n"; + +$blocks = ( new WP_Block_Parser() )->parse( $document ); +foreach ( $blocks as $block ) { +\tif ( null === $block['blockName'] ) { +\t\tcontinue; +\t} +\techo $block['blockName'] . ': ' . trim( strip_tags( $block['innerHTML'] ) ) . "\\n"; +}'''))), + ('Count every block type in a post', + '

A common audit task: "How many Paragraph, Image, and Gallery blocks does this post use?" A small queue keeps the example readable while still visiting nested blocks.

', + ('count-blocks.php', php('''$document = "
" +\t. "

Title

" +\t. "

One.

" +\t. "

Two.

" +\t. "
" +\t. "
"; + +$blocks = ( new WP_Block_Parser() )->parse( $document ); + +$counts = array(); +$queue = $blocks; + +while ( ! empty( $queue ) ) { +\t$block = array_shift( $queue ); + +\tif ( null !== $block['blockName'] ) { +\t\t$name = $block['blockName']; +\t\t$counts[ $name ] = isset( $counts[ $name ] ) ? $counts[ $name ] + 1 : 1; +\t} + +\tforeach ( $block['innerBlocks'] as $inner_block ) { +\t\t$queue[] = $inner_block; +\t} +} + +arsort( $counts ); +foreach ( $counts as $name => $n ) { +\techo str_pad( (string) $n, 4, ' ', STR_PAD_LEFT ) . ' ' . $name . "\\n"; +}'''))), + ('Check whether a post uses a block', + '

Useful for templates, audits, and migrations: answer one yes/no question without caring where the block appears in the tree.

', + ('has-block.php', php('''$document = "
" +\t. "
" +\t. "" +\t. "
" +\t. "
"; + +$blocks = ( new WP_Block_Parser() )->parse( $document ); + +function post_has_block( $blocks, $name ) { +\t$queue = $blocks; + +\twhile ( ! empty( $queue ) ) { +\t\t$block = array_shift( $queue ); +\t\tif ( $name === $block['blockName'] ) { +\t\t\treturn true; +\t\t} + +\t\tforeach ( $block['innerBlocks'] as $inner_block ) { +\t\t\t$queue[] = $inner_block; +\t\t} +\t} + +\treturn false; +} + +echo post_has_block( $blocks, 'core/button' ) ? "has button\\n" : "missing button\\n"; +echo post_has_block( $blocks, 'core/gallery' ) ? "has gallery\\n" : "missing gallery\\n";'''))), + ('Lint headings for hierarchy mistakes', + '

"Don\'t skip from H2 to H4" is a real accessibility rule. The helper below keeps headings in document order, including headings nested inside Group, Column, and Cover blocks.

', + ('lint-headings.php', php('''$document = "\\n

Intro

\\n" +\t. "\\n

Subsection

\\n" +\t. "\\n

Body

\\n"; + +$blocks = ( new WP_Block_Parser() )->parse( $document ); + +function collect_headings( $blocks, &$headings ) { +\tforeach ( $blocks as $block ) { +\t\tif ( 'core/heading' === $block['blockName'] ) { +\t\t\t$headings[] = array( +\t\t\t\t'level' => isset( $block['attrs']['level'] ) ? (int) $block['attrs']['level'] : 2, +\t\t\t\t'text' => trim( strip_tags( $block['innerHTML'] ) ), +\t\t\t); +\t\t} + +\t\tcollect_headings( $block['innerBlocks'], $headings ); +\t} +} + +$headings = array(); +collect_headings( $blocks, $headings ); + +$last = 1; +foreach ( $headings as $heading ) { +\t$level = $heading['level']; +\t$label = $heading['text']; + +\tif ( $level > $last + 1 ) { +\t\techo "WARN {$label}: jumped from H{$last} to H{$level}\\n"; +\t} else { +\t\techo "ok {$label}: H{$level}\\n"; +\t} +\t$last = $level; +}'''))), + ('Find all instances of a custom block', + '

When auditing an export for a block your plugin owns, collect every match and print the fields a human cares about.

', + ('find-custom-block.php', php('''$document = "

Reviews

" +\t. "" +\t. "
Loved it.
" +\t. "" +\t. "" +\t. "
Pretty good.
" +\t. ""; + +$blocks = ( new WP_Block_Parser() )->parse( $document ); + +function find_blocks_by_name( $blocks, $name, &$matches ) { +\tforeach ( $blocks as $block ) { +\t\tif ( $name === $block['blockName'] ) { +\t\t\t$matches[] = $block; +\t\t} + +\t\tfind_blocks_by_name( $block['innerBlocks'], $name, $matches ); +\t} +} + +$testimonials = array(); +find_blocks_by_name( $blocks, 'my-plugin/testimonial', $testimonials ); + +foreach ( $testimonials as $i => $b ) { +\techo ( $i + 1 ) . '. ' . $b['attrs']['author'] . ' (' . $b['attrs']['rating'] . '/5): ' +\t\t. trim( strip_tags( $b['innerHTML'] ) ) . "\\n"; +}'''))), + ('Detect blocks with stale embed URLs', + '

A real-world content audit: find every core/embed whose URL points at a domain you have retired.

', + ('audit-embeds.php', php('''$document = '' +\t. '' +\t. ''; + +$retired = array( 'vine.co', 'plus.google.com' ); + +foreach ( ( new WP_Block_Parser() )->parse( $document ) as $b ) { +\tif ( 'core/embed' !== $b['blockName'] ) { +\t\tcontinue; +\t} +\t$url = isset( $b['attrs']['url'] ) ? $b['attrs']['url'] : ''; +\t$host = parse_url( $url, PHP_URL_HOST ); +\t$bad = $host && in_array( $host, $retired, true ); +\techo ( $bad ? 'STALE ' : 'ok ' ) . $url . "\\n"; +}'''))), + ])) + +# =========================================================================== +# Markdown +# =========================================================================== +COMPONENTS.append(('markdown', 'Markdown', + 'Bidirectional converter between Markdown and WordPress block markup. Useful for moving content between Markdown files and WordPress while preserving the structures both formats can express.', + 'wp-php-toolkit/markdown', + [ + ('Why this exists', + '

Many publishing workflows start in Markdown: documentation sites, static-site generators, Git-backed editorial workflows, Obsidian vaults, and developer notes. WordPress stores editor content as block markup. Moving between those worlds by string replacement loses metadata and quickly breaks on lists, tables, code blocks, and frontmatter.

' + '

The Markdown component provides a structured bridge. MarkdownConsumer turns Markdown plus frontmatter into block markup and metadata; MarkdownProducer turns supported block markup back into Markdown. The conversion is meant for practical content workflows, not byte-identical round-tripping of every custom block attribute.

', + None), + ('Markdown to blocks', + '

Feed Markdown into MarkdownConsumer, get block markup back. The result is a BlocksWithMetadata object that holds both the rendered blocks and any frontmatter parsed from the document.

', + ('quickstart.php', php('''use WordPress\\Markdown\\MarkdownConsumer; + +$result = ( new MarkdownConsumer( "# Hello\\n\\nWelcome to **WordPress**." ) )->consume(); +echo $result->get_block_markup();'''))), + ('Round-trip: blocks back to Markdown', + '

Pair MarkdownProducer with MarkdownConsumer to convert in either direction. Round-tripping is lossy for block attributes that have no Markdown representation (custom classes, alignment), so do not expect byte-perfect equality.

', + ('roundtrip.php', php('''use WordPress\\Markdown\\MarkdownConsumer; +use WordPress\\Markdown\\MarkdownProducer; + +$md = "## Round trip\\n\\n- one\\n- two\\n- three\\n"; +$blocks = ( new MarkdownConsumer( $md ) )->consume(); +$markdown = ( new MarkdownProducer( $blocks ) )->produce(); + +echo $markdown;'''))), + ('Reading YAML frontmatter as post meta', + '

Frontmatter keys come back as arrays so a single key can hold multiple values. Use get_meta_value() when you only want the first scalar.

', + ('frontmatter.php', php('''use WordPress\\Markdown\\MarkdownConsumer; + +$md = <<consume(); + +echo 'Title: ' . $consumer->get_meta_value( 'post_title' ) . "\\n"; +echo 'Status: ' . $consumer->get_meta_value( 'post_status' ) . "\\n"; +$metadata = $consumer->get_all_metadata(); +echo 'Tags: ' . implode( ', ', $metadata['tags'][0] ) . "\\n";'''))), + ('Migrating an Obsidian or Hugo folder of Markdown', + '

Walk a directory of .md files (Obsidian vault, Hugo content/, Jekyll _posts) and emit one block-markup record per file.

', + ('migrate-folder.php', php('''use WordPress\\Markdown\\MarkdownConsumer; + +@mkdir( '/tmp/vault', 0777, true ); +file_put_contents( '/tmp/vault/welcome.md', "---\\ntitle: Welcome\\n---\\n\\nHello world." ); +file_put_contents( '/tmp/vault/roadmap.md', "# Roadmap\\n\\n1. Ship\\n2. Iterate" ); + +foreach ( glob( '/tmp/vault/*.md' ) as $path ) { +\t$consumer = new MarkdownConsumer( file_get_contents( $path ) ); +\t$consumer->consume(); +\t$title = $consumer->get_meta_value( 'title' ); +\tif ( ! $title ) $title = basename( $path, '.md' ); +\techo "=== $title ($path) ===\\n"; +\techo substr( $consumer->get_block_markup(), 0, 120 ) . "...\\n\\n"; +}'''))), + ('Counting blocks produced by a Markdown document', + '

After conversion, the block markup is plain WordPress block markup, so parse_blocks() works on it directly. The standard way to introspect what the converter emitted before saving to the database.

', + ('count-blocks.php', php('''use WordPress\\Markdown\\MarkdownConsumer; + +$md = << A quote. +MD; + +$blocks = ( new MarkdownConsumer( $md ) )->consume()->get_block_markup(); +$counts = array(); +$queue = parse_blocks( $blocks ); + +while ( $queue ) { +\t$block = array_shift( $queue ); +\tif ( null !== $block['blockName'] ) { +\t\t$name = $block['blockName']; +\t\t$counts[ $name ] = isset( $counts[ $name ] ) ? $counts[ $name ] + 1 : 1; +\t} +\tforeach ( $block['innerBlocks'] as $inner_block ) { +\t\t$queue[] = $inner_block; +\t} +} +foreach ( $counts as $name => $count ) { +\techo "{$name}: {$count}\\n"; +}'''))), + ])) + +# =========================================================================== +# XML +# =========================================================================== +COMPONENTS.append(('xml', 'XML', + 'A streaming, namespace-aware XML processor in pure PHP. Read and modify huge feeds, WXR exports, ePub manifests, and Office Open XML parts without ever loading the document into memory and without depending on libxml2.', + 'wp-php-toolkit/xml', + [ + ('Why this exists', + '

SimpleXMLElement and DOMDocument both need libxml2 and both build a complete in-memory tree. XMLProcessor walks the document forward as a cursor, keeps modifications in a side buffer, and emits the full updated XML with get_updated_xml() only when you ask for it.

' + '

This design came from WordPress-scale documents such as WXR exports. A migration may only need to rewrite wp:attachment_url values or bump a feed attribute, so the processor optimizes for targeted cursor edits instead of a full validating XML stack.

' + '

Footgun #1: namespace-aware methods use the namespace name declared in xmlns, not the prefix written in the tag. In WXR, get_attribute( \'wp\', \'status\' ) looks for a namespace literally named wp; for the usual WXR declaration you want get_attribute( \'http://wordpress.org/export/1.2/\', \'status\' ).

' + '

Footgun #2: in streaming mode next_tag() can return false because input ran out, not because the document ended. Check is_paused_at_incomplete_input() before assuming you\'re done.

', + None), + ('Bump every price in a catalog', + '

Find each <book>, read its price, write a new one, emit the updated document.

', + ('bump-prices.php', php('''use WordPress\\XML\\XMLProcessor; + +$xml = '' +\t. 'PHP Internals' +\t. 'WordPress at Scale' +\t. ''; + +$p = XMLProcessor::create_from_string( $xml ); +while ( $p->next_tag( 'book' ) ) { +\t$old = (float) $p->get_attribute( '', 'price' ); +\t$new = number_format( $old * 1.10, 2, '.', '' ); +\t$p->set_attribute( '', 'price', $new ); +} + +echo $p->get_updated_xml();'''))), + ('Read namespaced attributes from a WXR export', + '

WordPress\'s WXR commonly uses wp:, dc:, and content: prefixes bound to namespace names such as http://wordpress.org/export/1.2/. Pass that expanded namespace name, not the prefix; the processor handles whichever prefix the document actually uses.

', + ('wxr-namespaces.php', php('''use WordPress\\XML\\XMLProcessor; + +$wxr = '' +\t. '' +\t. '' +\t. 'Hello World' +\t. 'admin' +\t. '42' +\t. 'publish' +\t. ''; + +$WP = 'http://wordpress.org/export/1.2/'; +$DC = 'http://purl.org/dc/elements/1.1/'; + +$p = XMLProcessor::create_from_string( $wxr ); +while ( $p->next_tag( 'item' ) ) { +\twhile ( $p->next_token() ) { +\t\tif ( $p->is_tag_closer() && 'item' === $p->get_tag_local_name() ) break; +\t\tif ( ! $p->is_tag_opener() ) continue; +\t\t$ns = $p->get_tag_namespace(); +\t\t$local = $p->get_tag_local_name(); +\t\t$prefix = ( $WP === $ns ) ? 'wp/' : ( ( $DC === $ns ) ? 'dc/' : '' ); +\t\techo "{$prefix}{$local}: "; +\t\twhile ( $p->next_token() && '#text' !== $p->get_token_name() ) {} +\t\techo trim( $p->get_modifiable_text() ) . "\\n"; +\t} +}'''))), + ('Rewrite URLs across an entire WXR export', + '

Large WXR exports can hold many URLs in <link>, <guid>, and post content. Streaming the file lets you rewrite large exports without loading the whole XML document into memory.

', + ('rewrite-wxr-urls.php', php('''use WordPress\\XML\\XMLProcessor; + +$wxr = '' +\t. 'https://old.example.com' +\t. 'https://old.example.com/2024/post-1' +\t. 'https://old.example.com/?p=1' +\t. ''; + +$from = 'https://old.example.com'; +$to = 'https://new.example.com'; + +$p = XMLProcessor::create_from_string( $wxr ); +$rewritten = 0; + +while ( $p->next_token() ) { +\tif ( '#text' !== $p->get_token_name() ) continue; +\t$text = $p->get_modifiable_text(); +\tif ( false === strpos( $text, $from ) ) continue; +\t$p->set_modifiable_text( str_replace( $from, $to, $text ) ); +\t$rewritten++; +} + +echo "rewrote {$rewritten} text nodes\\n\\n"; +echo $p->get_updated_xml();'''))), + ('Parse OPML to extract feed URLs', + '

OPML is the format Feedly and many readers use to import/export feed lists. Flat, attribute-heavy XML — exactly what a tag processor handles best.

', + ('opml.php', php('''use WordPress\\XML\\XMLProcessor; + +$opml = 'My Feeds' +\t. '' +\t. '' +\t. '' +\t. '' +\t. ''; + +$p = XMLProcessor::create_from_string( $opml ); +while ( $p->next_tag( 'outline' ) ) { +\t$url = $p->get_attribute( '', 'xmlUrl' ); +\tif ( null === $url ) continue; +\techo $p->get_attribute( '', 'text' ) . "\\t" . $url . "\\n"; +}'''))), + ])) + +# =========================================================================== +# Encoding +# =========================================================================== +COMPONENTS.append(('encoding', 'Encoding', + 'UTF-8 validation and scrubbing with a pure-PHP fallback when mbstring is unavailable. Detects malformed bytes and replaces them per the Unicode maximal-subpart algorithm.', + 'wp-php-toolkit/encoding', + [ + ('Why this exists', + '

Every parser in this toolkit eventually has to decide what to do with text bytes. XML rejects malformed UTF-8. JSON and databases can fail late. CSS, HTML, WXR, and Blueprint validation all need consistent answers about whether a string is well-formed Unicode.

' + '

The Encoding component provides the small UTF-8 primitives the rest of the toolkit can share: validate bytes, scrub invalid sequences, scan code points, and detect Unicode noncharacters. When mbstring is available it can delegate to it; when it is not, the component uses its own byte scanner so behavior stays available in restricted PHP environments.

' + '

Historically, this became the common foundation for Blueprint validation and CSS/XML processing, replacing ad hoc Unicode helpers with the WordPress core UTF-8 routines used here.

', + None), + ('Validating UTF-8 before storing it', + '

wp_is_valid_utf8() rejects overlong sequences, surrogate halves, and stray ISO-8859-1 bytes. Use it as a guard in front of any code path that assumes UTF-8 (database, JSON, XML).

', + ('validate.php', php('''use function WordPress\\Encoding\\wp_is_valid_utf8; + +$samples = array( +\t'ASCII' => 'just a test', +\t'UTF-8 pencil' => "\\xE2\\x9C\\x8F", +\t'latin-1 byte' => "B\\xFCch", +\t'overlong slash' => "\\xC1\\xBF", +\t'surrogate half' => "\\xED\\xB0\\x80", +); + +foreach ( $samples as $label => $bytes ) { +\techo sprintf( "%-14s %s\\n", $label . ':', wp_is_valid_utf8( $bytes ) ? 'valid' : 'invalid' ); +}'''))), + ('Scrubbing invalid bytes with U+FFFD', + '

Replace each ill-formed sequence with the Unicode replacement character. Useful right before serializing to XML, JSON, or sending to an LLM that will choke on broken bytes.

', + ('scrub.php', php('''use function WordPress\\Encoding\\wp_scrub_utf8; + +$broken = "the byte \\xC0 should not be here."; +echo wp_scrub_utf8( $broken ) . "\\n"; + +echo wp_scrub_utf8( ".\\xE2\\x8C\\xE2\\x8C." ) . "\\n";'''))), + ('Detecting noncharacters MySQL/utf8mb4 will reject', + '

Code points like U+FFFE, U+FFFF, and the U+FDD0–U+FDEF block are valid Unicode but forbidden in XML and rejected by some databases. Check before inserting user-submitted content into a strict utf8mb4 column.

', + ('noncharacters.php', php('''use function WordPress\\Encoding\\wp_has_noncharacters; + +$samples = array( +\t'normal text' => 'normal text', +\t'U+FFFE' => "oops \\u{FFFE}", +\t'U+FDD0' => "hi \\u{FDD0} bye", +); + +foreach ( $samples as $label => $text ) { +\techo sprintf( "%-12s %s\\n", $label . ':', wp_has_noncharacters( $text ) ? 'reject' : 'ok' ); +}'''))), + ('Three-way pipeline: validate, scrub, then check noncharacters', + '

Real-world inputs are messy: an old WXR export, a CSV with mixed encodings, a paste from Word. Combination of validate + scrub + noncharacter-check covers the three classes of breakage that bite later.

', + ('pipeline.php', php('''use function WordPress\\Encoding\\wp_is_valid_utf8; +use function WordPress\\Encoding\\wp_scrub_utf8; +use function WordPress\\Encoding\\wp_has_noncharacters; + +$inputs = array( +\t'good' => 'Café', +\t'latin1' => "caf\\xE9", +\t'overlong' => "x\\xC1\\xBFy", +\t'noncharac' => "hi \\u{FFFE} there", +); + +foreach ( $inputs as $label => $bytes ) { +\t$valid = wp_is_valid_utf8( $bytes ); +\t$cleaned = wp_scrub_utf8( $bytes ); +\t$weird = wp_has_noncharacters( $cleaned ); +\techo sprintf( "%-10s valid=%s noncharacter=%s -> %s\\n", $label, $valid ? 'Y' : 'N', $weird ? 'Y' : 'N', $cleaned ); +}'''))), + ('Salvaging a legacy ISO-8859-1 column inside a UTF-8 corpus', + '

Old WordPress databases sometimes mix encodings: most rows are UTF-8 but a few were stored as latin-1. Detect the bad rows with wp_is_valid_utf8() and only re-encode those.

', + ('mixed-encoding.php', php('''use function WordPress\\Encoding\\wp_is_valid_utf8; +use function WordPress\\Encoding\\wp_scrub_utf8; + +$rows = array( +\t1 => 'Plain ASCII', +\t2 => 'Café', +\t3 => "caf\\xE9", +\t4 => "weird \\xC0 byte", +); + +foreach ( $rows as $id => $value ) { +\tif ( wp_is_valid_utf8( $value ) ) { +\t\techo "#$id ok: $value\\n"; +\t\tcontinue; +\t} +\t$converted = @iconv( 'ISO-8859-1', 'UTF-8', $value ); +\tif ( false !== $converted && wp_is_valid_utf8( $converted ) ) { +\t\techo "#$id recovered as latin1: $converted\\n"; +\t} else { +\t\techo "#$id unrecoverable, scrubbing: " . wp_scrub_utf8( $value ) . "\\n"; +\t} +}'''))), + ])) + +# =========================================================================== +# DataLiberation +# =========================================================================== +COMPONENTS.append(('dataliberation', 'DataLiberation', + 'Streaming WordPress import/export. WXR, SQL, block markup — without loading whole datasets into memory.', + 'wp-php-toolkit/data-liberation', + [ + ('Why this exists', + '

WordPress content should be portable, but real migrations cross several formats. A site export might arrive as WXR, a Markdown folder, or entities from another CMS. URLs can hide in block attributes, HTML, CSS, feeds, GUIDs, and post meta. Importers must also resume after a failed media download or upload.

' + '

The DataLiberation component streams WordPress-shaped data through readers, transformers, and writers. It models posts, terms, comments, attachments, and metadata as ImportEntity objects, then lets a pipeline rewrite each entity without loading the full export into memory.

' + '

The API reflects specific migration bugs: relative URLs in known block attributes, URLs inside inline CSS, self-closing block comments that must keep their shape, and origin-only URLs whose trailing slash style should not change during a rewrite.

' + '

Reach for it when the job combines formats: build WXR from another CMS, rewrite a staging export for production, frontload remote assets, or compose Markdown, XML, HTML, CSS, and URL rewriting into one pipeline.

', + None), + ('Write a WXR file in five lines', + '

Stream a single post into a WXR document via WXRWriter. The writer holds no buffer beyond what is needed to close currently-open tags, so memory stays flat regardless of input size.

', + ('wxr-quickstart.php', php('''use WordPress\\ByteStream\\MemoryPipe; +use WordPress\\DataLiberation\\EntityWriter\\WXRWriter; +use WordPress\\DataLiberation\\ImportEntity; + +$pipe = new MemoryPipe(); +$writer = new WXRWriter( $pipe ); +$writer->append_entity( new ImportEntity( 'post', array( +\t'post_title' => 'Hello', +\t'content' => 'World.', +\t'post_id' => '1', +\t'status' => 'publish', +) ) ); +$writer->finalize(); +$writer->close_writing(); +$pipe->close_writing(); +$wxr = $pipe->consume_all(); + +echo "bytes: " . strlen( $wxr ) . "\\n"; +echo false !== strpos( $wxr, 'Hello' ) ? "title exported\\n" : "title missing\\n"; +echo false !== strpos( $wxr, 'publish' ) ? "status exported\\n" : "status missing\\n";'''))), + ('Build a WXR programmatically from any source', + '

The writer doesn\'t care where entities come from. Loop over rows from a CMS, a CSV, or a Notion API dump and emit posts plus their meta and comments.

', + ('build-wxr.php', php('''use WordPress\\ByteStream\\MemoryPipe; +use WordPress\\DataLiberation\\EntityWriter\\WXRWriter; +use WordPress\\DataLiberation\\ImportEntity; + +$rows = array( +\tarray( 'id' => 10, 'title' => 'About', 'body' => '

About us.

', 'tags' => array( 'company' ) ), +\tarray( 'id' => 11, 'title' => 'Blog', 'body' => '

Hello world.

', 'tags' => array( 'news', 'launch' ) ), +); + +$pipe = new MemoryPipe(); +$writer = new WXRWriter( $pipe ); + +foreach ( $rows as $row ) { +\t$writer->append_entity( new ImportEntity( 'post', array( +\t\t'post_id' => (string) $row['id'], +\t\t'post_title' => $row['title'], +\t\t'content' => $row['body'], +\t\t'status' => 'publish', +\t\t'post_type' => 'post', +\t) ) ); +\tforeach ( $row['tags'] as $i => $tag ) { +\t\t$writer->append_entity( new ImportEntity( 'term', array( +\t\t\t'term_id' => (string) ( $row['id'] * 100 + $i ), +\t\t\t'taxonomy' => 'post_tag', +\t\t\t'slug' => $tag, +\t\t\t'parent' => '0', +\t\t) ) ); +\t} +} + +$writer->finalize(); +$writer->close_writing(); +$pipe->close_writing(); + +$wxr = $pipe->consume_all(); +echo "items: " . substr_count( $wxr, '' ) . "\\n"; +echo "terms: " . substr_count( $wxr, '' ) . "\\n"; +echo false !== strpos( $wxr, 'Blog' ) ? "Blog post exported\\n" : "Blog post missing\\n";'''))), + ('Read entities from a WXR file with constant memory', + '

WXREntityReader emits one entity at a time. A 10 GB WXR uses the same memory as a 10 KB one.

', + ('wxr-read.php', php('''use WordPress\\DataLiberation\\EntityReader\\WXREntityReader; + +$wxr = << + + +Demo +First1postBody 1 +Second2postBody 2 + + +XML; + +$reader = WXREntityReader::create(); +$reader->append_bytes( $wxr ); +$reader->input_finished(); + +while ( $reader->next_entity() ) { +\t$entity = $reader->get_entity(); +\techo $entity->get_type() . ': ' . json_encode( $entity->get_data() ) . "\\n"; +}'''))), + ('Streaming transform: rewrite URLs while copying WXR', + '

Wire reader to writer to rewrite a WXR file on the fly. This pattern is how you migrate a staging export to production: swap staging.example.com for example.com without ever loading the file into memory.

', + ('rewrite-urls.php', php('''use WordPress\\ByteStream\\MemoryPipe; +use WordPress\\DataLiberation\\EntityReader\\WXREntityReader; +use WordPress\\DataLiberation\\EntityWriter\\WXRWriter; +use WordPress\\DataLiberation\\ImportEntity; + +$source_xml = << + + +Hello1post +Visit https://staging.example.com/about for more. + + +XML; + +$reader = WXREntityReader::create(); +$reader->append_bytes( $source_xml ); +$reader->input_finished(); + +$out_pipe = new MemoryPipe(); +$writer = new WXRWriter( $out_pipe ); + +while ( $reader->next_entity() ) { +\t$entity = $reader->get_entity(); +\t$data = $entity->get_data(); +\tforeach ( array( 'post_content', 'content', 'description' ) as $field ) { +\t\tif ( isset( $data[ $field ] ) ) { +\t\t\t$data[ $field ] = str_replace( 'staging.example.com', 'example.com', $data[ $field ] ); +\t\t} +\t} +\tif ( 'post' === $entity->get_type() ) { +\t\t$data['content'] = isset( $data['post_content'] ) ? $data['post_content'] : ( isset( $data['content'] ) ? $data['content'] : '' ); +\t} +\t$writer->append_entity( new ImportEntity( $entity->get_type(), $data ) ); +} + +$writer->finalize(); +$writer->close_writing(); +$out_pipe->close_writing(); + +$wxr = $out_pipe->consume_all(); +echo false !== strpos( $wxr, 'https://example.com/about' ) ? "new URL present\\n" : "new URL missing\\n"; +echo false === strpos( $wxr, 'staging.example.com' ) ? "old URL removed\\n" : "old URL still present\\n";'''))), + ('Render Markdown into a WXR import in one pipeline', + '

Compose MarkdownConsumer with WXRWriter to publish a folder of Markdown directly as a WordPress import file.

', + ('md-to-wxr.php', php('''use WordPress\\ByteStream\\MemoryPipe; +use WordPress\\DataLiberation\\EntityWriter\\WXRWriter; +use WordPress\\DataLiberation\\ImportEntity; +use WordPress\\Markdown\\MarkdownConsumer; + +@mkdir( '/tmp/md-src', 0777, true ); +file_put_contents( '/tmp/md-src/hello.md', "---\\ntitle: Hello\\n---\\n\\n# Hello\\n\\nFirst post." ); +file_put_contents( '/tmp/md-src/second.md', "---\\ntitle: Second\\n---\\n\\nMore text **here**." ); + +$pipe = new MemoryPipe(); +$writer = new WXRWriter( $pipe ); + +$id = 1; +foreach ( glob( '/tmp/md-src/*.md' ) as $path ) { +\t$consumer = new MarkdownConsumer( file_get_contents( $path ) ); +\t$consumer->consume(); +\t$writer->append_entity( new ImportEntity( 'post', array( +\t\t'post_id' => (string) $id++, +\t\t'post_title' => $consumer->get_meta_value( 'title' ) ?: basename( $path, '.md' ), +\t\t'content' => $consumer->get_block_markup(), +\t\t'status' => 'publish', +\t\t'post_type' => 'post', +\t\t'post_name' => basename( $path, '.md' ), +\t) ) ); +} + +$writer->finalize(); +$writer->close_writing(); +$pipe->close_writing(); + +$wxr = $pipe->consume_all(); +echo "posts: " . substr_count( $wxr, '' ) . "\\n"; +echo false !== strpos( $wxr, '<!-- wp:heading' ) ? "block markup exported\\n" : "block markup missing\\n"; +echo false !== strpos( $wxr, 'Second' ) ? "frontmatter title exported\\n" : "frontmatter title missing\\n";'''))), + ])) + +# =========================================================================== +# Git +# =========================================================================== +COMPONENTS.append(('git', 'Git', + 'A pure-PHP Git client and server. Commits, branches, diffs, HTTP push/pull — all without shelling out to git.', + 'wp-php-toolkit/git', + [ + ('Why this exists', + '

Git is a useful storage model even when a server cannot run the git binary: snapshots, branches, object-addressed files, diffs, merges, and sync over HTTP. That matters for WordPress tools that want revision history for generated files, content snapshots, site state, or collaborative edits in constrained runtimes.

' + '

The Git component implements the core repository operations in PHP and stores objects through the toolkit Filesystem interface. That means the same repository can live on disk, in memory, or in another backend, and higher-level code can commit files without knowing where objects are stored.

' + '

The docs start with simple commits because that mental model scales: a repository is just objects plus refs. From there, branches, history walking, root commits, and merges become details you can reason about instead of magic shell behavior.

' + '

Choose it for tests, browser-like sandboxes, hosted WordPress environments, and applications that need Git behavior through PHP APIs instead of shell commands.

', + None), + ('Commit files into an in-memory repo', + '

The simplest possible repository: an InMemoryFilesystem as object storage and one commit() call. Reach for this in tests, in WP-CLI snapshots, or any place you want versioning without touching disk.

', + ('commit-in-memory.php', php('''use WordPress\\Filesystem\\InMemoryFilesystem; +use WordPress\\Git\\GitRepository; + +$repo = new GitRepository( InMemoryFilesystem::create() ); + +$oid = $repo->commit( array( +\t'updates' => array( +\t\t'README.md' => "# My Project\\n", +\t\t'src/hello-world.php' => 'get_branch_tip( 'HEAD' ) . "\\n"; +echo "README: " . $repo->read_object_by_path( '/README.md' )->consume_all();'''))), + ('Walk the commit history', + '

Follow the parent chain from HEAD backwards. Building block for a WP-CLI "post revisions" log or a "what changed since release X" report.

', + ('walk-history.php', php('''use WordPress\\Filesystem\\InMemoryFilesystem; +use WordPress\\Git\\GitRepository; +use WordPress\\Git\\Model\\Commit; + +$repo = new GitRepository( InMemoryFilesystem::create() ); +foreach ( array( 'add intro', 'fix typo', 'expand examples' ) as $i => $msg ) { +\t$repo->commit( array( +\t\t'updates' => array( 'post.md' => "# Draft {$i}" ), +\t\t'commit' => array( 'message' => $msg ), +\t) ); +} + +$oid = $repo->get_branch_tip( 'HEAD' ); +while ( ! Commit::is_null_hash( $oid ) ) { +\t$c = $repo->read_object( $oid )->as_commit(); +\techo substr( $c->hash, 0, 7 ) . ' ' . trim( $c->message ) . "\\n"; +\t$oid = $c->get_first_parent_hash(); +\tif ( ! $oid || ! $repo->has_object( $oid ) ) break; +}'''))), + ('Treat a repository like a filesystem', + '

GitFilesystem wraps a repository in this toolkit\'s Filesystem interface. With the default options, each put_contents() records a new commit.

', + ('git-filesystem.php', php('''use WordPress\\Filesystem\\InMemoryFilesystem; +use WordPress\\Git\\GitFilesystem; +use WordPress\\Git\\GitRepository; + +$repo = new GitRepository( InMemoryFilesystem::create() ); +$fs = GitFilesystem::create( $repo ); + +$fs->put_contents( '/posts/hello.md', "# Hello\\nFirst draft." ); +$fs->put_contents( '/posts/about.md', "# About\\nWho we are." ); +$fs->put_contents( '/posts/hello.md', "# Hello\\nSecond draft." ); + +echo "tree:\\n"; +foreach ( $fs->ls( '/posts' ) as $name ) { +\techo " /posts/{$name}\\n"; +} +echo "\\nhello.md now:\\n" . $fs->get_contents( '/posts/hello.md' ) . "\\n";'''))), + ('Branch, edit, and switch back', + '

Create a feature branch off the current commit, change files, flip HEAD back. Useful for experimental edits in collaborative tools.

', + ('branches.php', php('''use WordPress\\Filesystem\\InMemoryFilesystem; +use WordPress\\Git\\GitRepository; + +$repo = new GitRepository( InMemoryFilesystem::create() ); +$base = $repo->commit( array( +\t'updates' => array( 'config.json' => '{"flag":false}' ), +\t'commit' => array( 'message' => 'baseline' ), +) ); + +$repo->create_branch( 'refs/heads/experiment', $base ); +$repo->checkout( 'refs/heads/experiment' ); +$repo->commit( array( +\t'updates' => array( 'config.json' => '{"flag":true}' ), +\t'commit' => array( 'message' => 'flip the flag' ), +) ); + +echo "on experiment: " . $repo->read_object_by_path( '/config.json' )->consume_all() . "\\n"; + +$repo->checkout( 'refs/heads/trunk' ); +echo "on trunk: " . $repo->read_object_by_path( '/config.json' )->consume_all() . "\\n";'''))), + ('Three-way merge two branches', + '

The classic Git workflow: branch off, edit on each side, merge. $repo->merge() finds the common ancestor, three-way-merges every file, and creates a merge commit.

', + ('merge-branches.php', php('''use WordPress\\Filesystem\\InMemoryFilesystem; +use WordPress\\Git\\GitRepository; + +$repo = new GitRepository( InMemoryFilesystem::create() ); +$base = $repo->commit( array( 'updates' => array( +\t'todo.txt' => "buy milk\\nwalk dog\\nread book\\n", +) ) ); + +$repo->commit( array( 'updates' => array( +\t'todo.txt' => "buy oat milk\\nwalk dog\\nread book\\n", +) ) ); + +$repo->create_branch( 'refs/heads/feature', $base ); +$repo->checkout( 'refs/heads/feature' ); +$repo->commit( array( 'updates' => array( +\t'todo.txt' => "buy milk\\nwalk dog\\nread book\\nwrite blog post\\n", +) ) ); + +$repo->checkout( 'refs/heads/trunk' ); +$result = $repo->merge( 'refs/heads/feature' ); + +echo "merge head: {$result['new_head']}\\n"; +echo "conflicts: " . ( $result['conflicts'] ? implode( ',', $result['conflicts'] ) : 'none' ) . "\\n"; +echo "result:\\n" . $repo->read_object_by_path( '/todo.txt' )->consume_all();'''))), + ('Snapshot WordPress options into a repo', + '

Serialize a chunk of WP state (options, post meta, a theme config) on every save and commit it. You get free history, diffs between snapshots, and a "rollback to last week" button.

', + ('options-snapshot.php', php('''use WordPress\\Filesystem\\InMemoryFilesystem; +use WordPress\\Git\\GitRepository; + +$repo = new GitRepository( InMemoryFilesystem::create() ); + +$snapshots = array( +\tarray( 'blogname' => 'My Site', 'posts_per_page' => 10, 'timezone_string' => 'UTC' ), +\tarray( 'blogname' => 'My Site', 'posts_per_page' => 20, 'timezone_string' => 'UTC' ), +\tarray( 'blogname' => 'New Name', 'posts_per_page' => 20, 'timezone_string' => 'Europe/Warsaw' ), +); + +foreach ( $snapshots as $i => $options ) { +\t$repo->commit( array( +\t\t'updates' => array( 'options.json' => json_encode( $options, JSON_PRETTY_PRINT ) ), +\t\t'commit' => array( 'message' => "snapshot #{$i}" ), +\t) ); +} + +$head = $repo->get_branch_tip( 'HEAD' ); +$parent = $repo->read_object( $head )->as_commit()->get_first_parent_hash(); +$diff = $repo->diff_commits( $head, $parent ); + +echo "Files changed in last snapshot:\\n"; +foreach ( $diff as $name => $entry ) { +\techo " {$name}\\n"; +}'''))), + ])) + +# =========================================================================== +# Merge +# =========================================================================== +COMPONENTS.append(('merge', 'Merge', + 'Three-way merge and diff. Pluggable differ + merger + optional validator.', + 'wp-php-toolkit/merge', + [ + ('Why this exists', + '

Content synchronization needs more than "last write wins." A Markdown file changes in Git while the same post changes in WordPress. A generated config changes through both a CLI tool and a UI. In those cases you need a common ancestor, two edited versions, and a way to explain conflicts to a human.

' + '

The Merge component provides the diff and three-way merge primitives used by those workflows. The default examples are line-oriented because that is the most familiar shape, but the strategy is intentionally pluggable: choose the differ, choose the merger, and optionally validate the merged result before accepting it.

' + '

Use the merge result to auto-accept independent edits and to show structured conflicts when a person must decide.

', + None), + ('Diff two strings line by line', + '

Feed two strings to LineDiffer and inspect the operations. Every get_changes() entry is a [op, text] pair.

', + ('line-diff.php', php('''use WordPress\\Merge\\Diff\\Diff; +use WordPress\\Merge\\Diff\\LineDiffer; + +$diff = ( new LineDiffer() )->diff( +\t"alpha\\nbeta\\ngamma\\n", +\t"alpha\\nBETA\\ngamma\\ndelta\\n" +); + +$labels = array( Diff::DIFF_EQUAL => '=', Diff::DIFF_DELETE => '-', Diff::DIFF_INSERT => '+' ); +foreach ( $diff->get_changes() as $change ) { +\techo $labels[ $change[0] ] . ' ' . rtrim( $change[1] ) . "\\n"; +}'''))), + ('Render a unified patch', + '

format_as_git_patch() produces output that mirrors git diff, including hunk headers — handy for emails, CI annotations, or a "what changed?" panel.

', + ('git-patch.php', php('''use WordPress\\Merge\\Diff\\LineDiffer; + +$old = "title: Hello\\nauthor: Alice\\nstatus: draft\\n"; +$new = "title: Hello, world\\nauthor: Alice\\nstatus: published\\ntags: greeting\\n"; + +$diff = ( new LineDiffer() )->diff( $old, $new ); +echo $diff->format_as_git_patch( array( +\t'a_source' => 'a/post.yml', +\t'b_source' => 'b/post.yml', +) );'''))), + ('Three-way merge with no conflicts', + '

The classic case: each branch changes a different region. Pass the common ancestor plus both edits to MergeStrategy::merge() and read the merged result.

', + ('three-way.php', php('''use WordPress\\Merge\\Diff\\LineDiffer; +use WordPress\\Merge\\Merge\\LineMerger; +use WordPress\\Merge\\MergeStrategy; + +$strategy = new MergeStrategy( new LineDiffer(), new LineMerger() ); + +$result = $strategy->merge( +\t"intro\\nbody\\noutro\\n", +\t"intro updated\\nbody\\noutro\\n", +\t"intro\\nbody\\noutro\\nappendix\\n" +); + +echo $result->has_conflicts() ? "conflicts!\\n" : "clean merge:\\n"; +echo $result->get_merged_content();'''))), + ('Inspect and surface conflicts', + '

When both sides edit the same region, the merger produces a MergeConflict. The merged content carries Git-style markers, but the structured get_conflicts() output is what you want for a UI that lets the user pick a side.

', + ('conflicts.php', php('''use WordPress\\Merge\\Diff\\LineDiffer; +use WordPress\\Merge\\Merge\\LineMerger; +use WordPress\\Merge\\MergeStrategy; + +$strategy = new MergeStrategy( new LineDiffer(), new LineMerger() ); +$result = $strategy->merge( +\t"line 1\\nline 2\\n", +\t"line 1\\nline 2 from Alice\\n", +\t"line 1\\nline 2 from Bob\\n" +); + +if ( $result->has_conflicts() ) { +\tforeach ( $result->get_conflicts() as $c ) { +\t\techo "ours: " . trim( $c->ours ) . "\\n"; +\t\techo "theirs: " . trim( $c->theirs ) . "\\n"; +\t} +} +echo "\\n--- merged content with markers ---\\n"; +echo $result->get_merged_content();'''))), + ('Sync a Markdown folder against an edited DB copy', + '

A real-world scenario: posts live both in a Git-tracked Markdown folder and in WordPress, and someone edits each. Three-way-merge each post against its common ancestor.

', + ('sync-folder-vs-db.php', php('''use WordPress\\Merge\\Diff\\LineDiffer; +use WordPress\\Merge\\Merge\\LineMerger; +use WordPress\\Merge\\MergeStrategy; + +$strategy = new MergeStrategy( new LineDiffer(), new LineMerger() ); + +$posts = array( +\t'hello.md' => array( +\t\t'base' => "# Hello\\nDraft body.\\n", +\t\t'disk' => "# Hello\\nDraft body, expanded on disk.\\n", +\t\t'db' => "# Hello\\nDraft body.\\nNew section from the editor.\\n", +\t), +\t'about.md' => array( +\t\t'base' => "# About\\nWho we are.\\n", +\t\t'disk' => "# About\\nWho *they* are.\\n", +\t\t'db' => "# About\\nWho we really are.\\n", +\t), +); + +foreach ( $posts as $name => $sides ) { +\t$result = $strategy->merge( $sides['base'], $sides['disk'], $sides['db'] ); +\techo "=== {$name} ===\\n"; +\techo $result->has_conflicts() ? "(conflict — needs review)\\n" : "(auto-merged)\\n"; +\techo $result->get_merged_content() . "\\n"; +}'''))), + ])) + +# =========================================================================== +# HttpClient +# =========================================================================== +COMPONENTS.append(('httpclient', 'HttpClient', + 'Async HTTP client without curl required. Uses sockets when curl is missing, supports concurrent requests and streaming responses.', + 'wp-php-toolkit/http-client', + [ + ('Why this exists', + '

A plugin installer starts with one request to download plugin.zip. A migration then adds progress reporting, a ten-request media window, resumable downloads, and a remote ZIP reader that feeds ZipFilesystem directly. Those workflows need the same request API from the first GET to the final streamed archive.

' + '

The HttpClient component gives the toolkit a small request/response model, middleware for redirects and caching, concurrent fetches, and response bodies exposed as byte streams. It runs through curl when PHP provides curl and through pure PHP sockets when it does not. Callers keep the same code path.

' + '

Use it to fetch plugin metadata, submit import callbacks, mirror a media library, read a WXR export, or pipe a remote archive into Zip and Filesystem code.

', + None), + ('GET a URL', + '

Network access in the demo runtime. Live request examples show the real API, but outbound HTTP in browser sandboxes may require a CORS proxy.

' + '

The smallest flow has three steps: create a request, wait until headers arrive, then consume the body stream. This is intentionally close to the Fetch API shape, but the body is a toolkit byte stream instead of a buffered string.

', + ('get.php', php('''use WordPress\\HttpClient\\Client; +use WordPress\\HttpClient\\Request; + +$client = new Client(); +$stream = $client->fetch( new Request( 'https://example.com/' ) ); + +$response = $stream->await_response(); +echo "status: " . $response->status_code . "\\n"; +echo "first 80 bytes: " . substr( $stream->consume_all(), 0, 80 ) . "\\n";'''))), + ('POST to a URL', + '

Uploads use the same shape. The only difference is that the request declares a method, request headers, and an upload body stream. Here the body is form-encoded text wrapped in MemoryPipe; a file upload could provide a file-backed read stream instead.

', + ('post.php', php('''use WordPress\\HttpClient\\Client; +use WordPress\\HttpClient\\Request; +use WordPress\\ByteStream\\MemoryPipe; + +$payload = http_build_query( +\tarray( +\t\t'title' => 'Hello', +\t\t'tags' => 'http,php', +\t), +\t'', +\t'&' +); + +$client = new Client(); +$request = new Request( 'https://httpbin.org/post', array( +\t'method' => 'POST', +\t'headers' => array( +\t\t'content-type' => 'application/x-www-form-urlencoded', +\t\t'content-length' => (string) strlen( $payload ), +\t), +\t'body_stream' => new MemoryPipe( $payload ), +) ); + +$response = $client->fetch( $request )->json(); +echo "Server saw form title: " . $response['form']['title'] . "\\n";'''))), + ('Build a JSON request object', + '

A Request is just data until a client enqueues it. That makes it easy to test request construction without network access. The constructor normalizes headers, calculates content-length when the body stream has a known length, and moves URL credentials into an Authorization header.

', + ('request-object.php', php('''use WordPress\\ByteStream\\MemoryPipe; +use WordPress\\HttpClient\\Request; + +$body = new MemoryPipe( json_encode( array( +\t'title' => 'Hello', +\t'tags' => array( 'docs', 'php' ), +) ) ); +$body->close_writing(); + +$request = new Request( 'https://user:secret@api.example.test/posts', array( +\t'method' => 'POST', +\t'headers' => array( 'content-type' => 'application/json' ), +\t'body_stream' => $body, +) ); + +echo $request->method . ' ' . $request->url . "\\n"; +echo "content-type: " . $request->get_header( 'content-type' ) . "\\n"; +echo "content-length: " . $request->get_header( 'content-length' ) . "\\n"; +echo "authorization: " . substr( $request->get_header( 'authorization' ), 0, 10 ) . "...\\n";'''))), + ('Parse response headers', + '

Most applications receive Response objects from await_response(). Transports, middleware, and tests sometimes need the lower-level parser: Response::from_http_headers() turns raw HTTP header bytes into normalized status and case-insensitive headers.

', + ('parse-response.php', php('''use WordPress\\HttpClient\\Request; +use WordPress\\HttpClient\\Response; + +$request = new Request( 'https://api.example.test/posts/42' ); +$raw = "HTTP/1.1 201 Created\\r\\n" +\t. "Content-Type: application/json\\r\\n" +\t. "Location: /posts/42\\r\\n" +\t. "Content-Length: 27\\r\\n\\r\\n"; + +$response = Response::from_http_headers( $raw, $request ); + +echo "status: " . $response->status_code . ' ' . $response->get_reason_phrase() . "\\n"; +echo "ok: " . ( $response->ok() ? 'yes' : 'no' ) . "\\n"; +echo "type: " . $response->get_header( 'CONTENT-TYPE' ) . "\\n"; +echo "size: " . $response->total_bytes . " bytes\\n";'''))), + ('Pick the right reading style', + '

There are three common ways to consume a response. Start simple, then move down the table only when the workflow demands it.

' + '' + '' + '' + '' + '
StyleUse whenTradeoff
consume_all() or json()Small HTML, JSON, or API responses.Buffers the full body.
Client::await_next_event()Progress bars, streaming to disk, queues, failure handling.You own the event loop.
Filesystem and parser compositionRemote ZIPs, WXR files, import pipelines.Requires a stream-aware consumer.
', + None), + ('Choose a transport', + '

The transport is the I/O backend. It should not change your request, response, redirect, cache, or stream code; it only changes how bytes move across the network.

' + '' + '' + '' + '' + '
TransportWhat it doesWhen to choose it
autoUses curl when loaded, otherwise sockets.Application default. Best when you want portability and the fastest available backend.
socketsUses PHP stream sockets, no curl extension.Tests, Playground-style runtimes, hosts where curl is unavailable, or proving the dependency-free path works.
curlUses the curl extension.Hosts where curl is available and you want to compare behavior or performance explicitly.
' + '

concurrency, timeout_ms, cache_dir, redirects, and response streaming sit above the transport, so the examples later on work with either backend.

', + ('transports.php', php('''use WordPress\\HttpClient\\Client; + +$default = new Client(); // Same as array( 'transport' => 'auto' ). + +$portable = new Client( array( +\t'transport' => 'sockets', +) ); + +if ( extension_loaded( 'curl' ) ) { +\t$curl = new Client( array( +\t\t'transport' => 'curl', +\t) ); +}'''), False)), + ('Follow redirects and inspect the final request', + '

Redirects are middleware, not transport behavior. The client follows up to five redirects by default. The original Request keeps a chain to the final request, so importers can log where a source URL actually landed.

', + ('redirects.php', php('''use WordPress\\HttpClient\\Client; +use WordPress\\HttpClient\\Request; + +$client = new Client(); +$request = new Request( 'https://httpbin.org/redirect-to?url=https://example.com/' ); +$stream = $client->fetch( $request ); +$response = $stream->await_response(); +$stream->consume_all(); + +$final = $request->latest_redirect(); +echo "original: " . $request->url . "\\n"; +echo "final: " . $final->url . "\\n"; +echo "status: " . $response->status_code . "\\n";'''), False)), + ('Cache repeatable GET responses', + '

Pass cache_dir to add disk caching for cacheable GET and HEAD responses. Fresh cached responses replay the same header/body events as a network response, so crawlers and importers do not need a separate cache code path. Non-GET requests invalidate matching cache entries instead of being cached.

', + ('cache.php', php('''use WordPress\\HttpClient\\Client; +use WordPress\\HttpClient\\Request; + +$cache_dir = sys_get_temp_dir() . '/http-cache-' . uniqid(); +mkdir( $cache_dir ); + +$client = new Client( array( 'cache_dir' => $cache_dir ) ); +$url = 'https://httpbin.org/cache/60'; + +for ( $i = 1; $i <= 2; $i++ ) { +\t$stream = $client->fetch( new Request( $url ) ); +\t$response = $stream->await_response(); +\t$body = $stream->consume_all(); +\techo "request {$i}: HTTP " . $response->status_code . ', body=' . strlen( $body ) . " bytes\\n"; +} + +echo "cache files: " . count( glob( $cache_dir . '/*' ) ) . "\\n";'''), False)), + ('Handle failures without losing the queue', + '

Failures arrive as events. That lets a crawler, importer, package installer, or media frontloader log one bad URL and keep processing the rest of the queue. Treat failure handling as part of the event loop, not as one global try/catch around the whole batch.

', + ('failures.php', php('''use WordPress\\HttpClient\\Client; +use WordPress\\HttpClient\\Request; + +$client = new Client( array( 'timeout_ms' => 5000 ) ); +$client->enqueue( array( +\tnew Request( 'https://example.com/', array( 'method' => 'HEAD' ) ), +\tnew Request( 'https://example.invalid/missing' ), +) ); + +while ( $client->await_next_event() ) { +\t$request = $client->get_request(); +\t$event = $client->get_event(); + +\tif ( Client::EVENT_GOT_HEADERS === $event ) { +\t\techo "ok: " . $request->url . " HTTP " . $request->response->status_code . "\\n"; +\t} elseif ( Client::EVENT_FAILED === $event ) { +\t\techo "failed: " . $request->url . "\\n"; +\t} elseif ( Client::EVENT_FINISHED === $event ) { +\t\techo "finished: " . $request->url . "\\n"; +\t} +}'''), False)), + ('Monitor download progress', + '

When you care about progress, use the event loop directly. Count bytes from each EVENT_BODY_CHUNK_AVAILABLE event and compare them with Content-Length when the server provides one.

', + ('progress.php', php('''use WordPress\\HttpClient\\Client; +use WordPress\\HttpClient\\Request; + +$url = 'https://raw.githubusercontent.com/WordPress/php-toolkit/trunk/components/Zip/Tests/fixtures/childrens-literature.zip'; +$dest = sys_get_temp_dir() . '/progress-' . uniqid() . '.zip'; + +$client = new Client(); +$request = new Request( $url ); +$client->enqueue( array( $request ) ); + +$downloaded = 0; +$last_step = -1; +@unlink( $dest ); + +while ( $client->await_next_event() ) { +\t$event = $client->get_event(); +\t$request = $client->get_request(); + +\tif ( Client::EVENT_GOT_HEADERS === $event ) { +\t\techo "status: " . $request->response->status_code . "\\n"; +\t\tcontinue; +\t} + +\tif ( Client::EVENT_BODY_CHUNK_AVAILABLE === $event ) { +\t\t$chunk = $client->get_response_body_chunk(); +\t\t$downloaded += strlen( $chunk ); +\t\tfile_put_contents( $dest, $chunk, FILE_APPEND ); + +\t\t$total = $request->response->total_bytes; +\t\tif ( $total ) { +\t\t\t$step = min( 100, (int) floor( $downloaded / $total * 100 ) ); +\t\t\tif ( $step >= $last_step + 25 || 100 === $step ) { +\t\t\t\techo "progress: {$step}% ({$downloaded}/{$total} bytes)\\n"; +\t\t\t\t$last_step = $step; +\t\t\t} +\t\t} else { +\t\t\techo "downloaded: {$downloaded} bytes\\n"; +\t\t} +\t\tcontinue; +\t} + +\tif ( Client::EVENT_FINISHED === $event ) { +\t\techo "saved: {$dest}\\n"; +\t} elseif ( Client::EVENT_FAILED === $event ) { +\t\techo "failed: " . $request->error->message . "\\n"; +\t} +}'''))), + ('Keep a sliding window of 10 requests', + '

For large queues, do not enqueue everything at once. Keep at most ten active requests, enqueue another as each one finishes, and let the client multiplex only that window.

', + ('sliding-window.php', php('''use WordPress\\HttpClient\\Client; +use WordPress\\HttpClient\\Request; + +$urls = array(); +for ( $i = 1; $i <= 25; $i++ ) { +\t$urls[] = 'https://example.com/?request=' . $i; +} + +$client = new Client( array( 'concurrency' => 10 ) ); +$pending = $urls; +$active = array(); +$done = 0; + +$enqueue_next = function () use ( &$pending, &$active, $client ) { +\tif ( ! $pending ) { +\t\treturn; +\t} +\t$url = array_shift( $pending ); +\t$request = new Request( $url, array( 'method' => 'HEAD' ) ); +\t$active[ $request->id ] = $request; +\t$client->enqueue( array( $request ) ); +}; + +for ( $i = 0; $i < 10; $i++ ) { +\t$enqueue_next(); +} + +while ( $active && $client->await_next_event() ) { +\t$request = $client->get_request(); +\t$event = $client->get_event(); + +\tif ( Client::EVENT_GOT_HEADERS === $event ) { +\t\techo "headers {$request->id}: " . $request->response->status_code . "\\n"; +\t\tcontinue; +\t} + +\tif ( Client::EVENT_FINISHED === $event || Client::EVENT_FAILED === $event ) { +\t\tunset( $active[ $request->id ] ); +\t\t$done++; +\t\techo "finished {$done}/25, active=" . count( $active ) . "\\n"; +\t\t$enqueue_next(); +\t} +}'''))), + ('Resume a partial download', + '

Resuming is an HTTP contract between you and the server. Save what you already have, send a Range request for the remaining bytes, and append only if the server returns 206 Partial Content.

', + ('resume-download.php', php('''use WordPress\\HttpClient\\Client; +use WordPress\\HttpClient\\Request; + +$url = 'https://raw.githubusercontent.com/WordPress/php-toolkit/trunk/components/Zip/Tests/fixtures/childrens-literature.zip'; +$dest = sys_get_temp_dir() . '/resume-' . uniqid() . '.zip'; + +$client = new Client(); + +// Simulate an interrupted first attempt by downloading only the first 32 KB. +$first = new Request( $url, array( +\t'headers' => array( 'range' => 'bytes=0-32767' ), +) ); +$stream = $client->fetch( $first ); +$response = $stream->await_response(); +file_put_contents( $dest, $stream->consume_all() ); + +if ( 206 !== $response->status_code ) { +\techo "Server did not honor Range; start over with a full download.\\n"; +\texit; +} + +$downloaded = filesize( $dest ); +echo "partial file: {$downloaded} bytes\\n"; + +$resume = new Request( $url, array( +\t'headers' => array( 'range' => 'bytes=' . $downloaded . '-' ), +) ); +$stream = $client->fetch( $resume ); +$response = $stream->await_response(); + +if ( 206 !== $response->status_code ) { +\techo "Server did not resume; discard partial file and retry from byte 0.\\n"; +\texit; +} + +while ( ! $stream->reached_end_of_data() ) { +\t$n = $stream->pull( 8192 ); +\tif ( 0 === $n ) { +\t\tbreak; +\t} +\tfile_put_contents( $dest, $stream->consume( $n ), FILE_APPEND ); +} + +echo "complete file: " . filesize( $dest ) . " bytes\\n"; +echo "saved: {$dest}\\n";'''))), + ('Stream-unzip a remote archive', + '

Mount the remote archive with ZipFilesystem, then copy it into any writable filesystem. SeekableRequestReadStream caches received bytes to a temporary file so ZipFilesystem can read the central directory and seek to entries without first writing the ZIP yourself.

', + ('stream-unzip.php', php('''use WordPress\\HttpClient\\Client; +use WordPress\\HttpClient\\ByteStream\\SeekableRequestReadStream; +use WordPress\\HttpClient\\Request; +use WordPress\\Filesystem\\LocalFilesystem; +use WordPress\\Zip\\ZipFilesystem; +use function WordPress\\Filesystem\\copy_between_filesystems; +use function WordPress\\Filesystem\\ls_recursive; + +$url = 'https://raw.githubusercontent.com/WordPress/php-toolkit/trunk/components/Zip/Tests/fixtures/childrens-literature.zip'; +$root = sys_get_temp_dir() . '/remote-zip-' . uniqid(); +mkdir( $root ); + +$client = new Client(); +$reader = new SeekableRequestReadStream( +\tnew Request( $url ), +\tarray( 'client' => $client ) +); + +$response = $reader->await_response(); +if ( ! $response->ok() ) { +\techo "HTTP " . $response->status_code . "\\n"; +\texit; +} + +$zip = ZipFilesystem::create( $reader ); +$local = LocalFilesystem::create( $root ); + +copy_between_filesystems( array( +\t'source_filesystem' => $zip, +\t'source_path' => '/', +\t'target_filesystem' => $local, +\t'target_path' => '/', +) ); + +$tree = ls_recursive( $local, '/' ); +$files = 0; +array_walk_recursive( $tree, function ( $value, $key ) use ( &$files ) { +\tif ( 'type' === $key && 'file' === $value ) { +\t\t$files++; +\t} +} ); + +echo "extracted {$files} files\\n"; +echo "root: {$root}\\n";'''))), + ('Parallel fan-out: fetch many URLs at once', + '

Enqueue a batch of requests and react to events as they fire. The client multiplexes them — total wall time is roughly the slowest request, not the sum.

', + ('fan-out.php', php('''use WordPress\\HttpClient\\Client; +use WordPress\\HttpClient\\Request; + +$urls = array( +\t'https://wordpress.org/', +\t'https://make.wordpress.org/', +\t'https://developer.wordpress.org/', +); + +$client = new Client(); +$client->enqueue( array_map( function ( $url ) { +\treturn new Request( $url, array( 'method' => 'HEAD' ) ); +}, $urls ) ); + +$results = array(); +while ( $client->await_next_event() ) { +\t$request = $client->get_request(); +\tif ( Client::EVENT_GOT_HEADERS === $client->get_event() ) { +\t\t$results[ $request->url ] = $request->response->status_code; +\t} elseif ( Client::EVENT_FAILED === $client->get_event() ) { +\t\t$results[ $request->url ] = 'ERR ' . $request->error->message; +\t} +} + +foreach ( $results as $url => $status ) { +\tprintf( "%-40s %s\\n", $url, $status ); +}'''))), + ('Stream a download to disk without OOM', + '

Process the body chunk-by-chunk via the event loop. Memory stays flat regardless of file size.

', + ('stream-to-disk.php', php('''use WordPress\\HttpClient\\Client; +use WordPress\\HttpClient\\Request; + +$dest = sys_get_temp_dir() . '/wp-readme.html'; +$client = new Client(); +$client->enqueue( array( new Request( 'https://wordpress.org/' ) ) ); + +$bytes = 0; +@unlink( $dest ); + +while ( $client->await_next_event() ) { +\tswitch ( $client->get_event() ) { +\t\tcase Client::EVENT_BODY_CHUNK_AVAILABLE: +\t\t\t$chunk = $client->get_response_body_chunk(); +\t\t\t$bytes += strlen( $chunk ); +\t\t\tfile_put_contents( $dest, $chunk, FILE_APPEND ); +\t\t\tbreak; +\t\tcase Client::EVENT_FINISHED: +\t\t\techo "Wrote {$bytes} bytes to {$dest}\\n"; +\t\t\tbreak; +\t} +} + +echo "Peak memory: " . round( memory_get_peak_usage( true ) / 1024 / 1024, 2 ) . " MB\\n";'''))), + ])) + +# =========================================================================== +# HttpServer +# =========================================================================== +COMPONENTS.append(('httpserver', 'HttpServer', + 'A minimal blocking TCP HTTP server in pure PHP. For CLI tools and tests, not for production traffic.', + 'wp-php-toolkit/http-server', + [ + ('Why this exists', + '

Sometimes a PHP tool needs a tiny local HTTP surface: a test fixture server, a webhook receiver during development, a CLI tool with a browser UI, or a demo endpoint for another component. Pulling in a production web framework would obscure the example and add dependencies the toolkit avoids.

' + '

The HttpServer component is intentionally small: a blocking TCP server, incoming request objects, and response writers. It is useful for local tools and tests. It is not a replacement for nginx, Apache, php-fpm, RoadRunner, Swoole, or a production application server.

', + None), + ('Hello world on port 8080', + '

Run on your machine: the Playground sandbox does not allow processes to bind listening TCP ports. Save this snippet locally and run php hello-server.php.

', + ('hello-server.php', '''set_handler( function ( IncomingRequest $request, ResponseWriteStream $response ) { +\t$response->send_http_code( 200 ); +\t$response->send_header( 'Content-Type', 'text/plain' ); +\t$response->append_bytes( "Hello from " . $request->method . " " . $request->url . "\\n" ); +} ); + +$server->serve( function ( $host, $port ) { +\techo "Listening on http://{$host}:{$port}\\n"; +} );''', False)), + ('A tiny JSON router', + '

Run on your machine: needs a listening port. Once running, try curl localhost:8080/api/status.

' + '

Build a CLI tool with a web UI by switching on the parsed path and method.

', + ('mini-router.php', '''set_handler( function ( IncomingRequest $request, ResponseWriteStream $response ) { +\t$path = $request->get_parsed_url()->pathname; + +\tif ( '/api/status' === $path ) { +\t\t$response->send_http_code( 200 ); +\t\t$response->send_header( 'Content-Type', 'application/json' ); +\t\t$response->append_bytes( json_encode( array( +\t\t\t'ok' => true, +\t\t\t'pid' => getmypid(), +\t\t\t'memory' => memory_get_usage( true ), +\t\t) ) ); +\t\treturn; +\t} + +\tif ( '/api/echo' === $path && 'POST' === $request->method ) { +\t\t$body = ''; +\t\twhile ( ! $request->body_stream->reached_end_of_data() ) { +\t\t\t$n = $request->body_stream->pull( 4096 ); +\t\t\tif ( $n > 0 ) $body .= $request->body_stream->consume( $n ); +\t\t} +\t\t$response->send_http_code( 200 ); +\t\t$response->send_header( 'Content-Type', 'text/plain' ); +\t\t$response->append_bytes( $body ); +\t\treturn; +\t} + +\t$response->send_http_code( 404 ); +\t$response->append_bytes( "Not found\\n" ); +} ); + +$server->serve();''', False)), + ('Buffered response with auto Content-Length', + '

Use BufferingResponseWriter when you want the framework to compute Content-Length for you, or when the runtime is CGI-shaped and expects the full body up front. This one runs anywhere — no socket required.

', + ('buffered-writer.php', php('''use WordPress\\HttpServer\\Response\\BufferingResponseWriter; + +$writer = new BufferingResponseWriter(); +$writer->send_http_code( 200 ); +$writer->send_header( 'Content-Type', 'text/html' ); +$writer->append_bytes( 'Hi

Hello

' ); +$writer->append_bytes( '

Buffered body, sent at the end.

' ); + +ob_start(); +$writer->close_writing(); +$response_body = ob_get_clean(); + +echo "headers before send:\\n"; +foreach ( $writer->get_buffered_headers() as $name => $value ) { +\techo "{$name}: {$value}\\n"; +} +echo "\\nbody:\\n" . $response_body;'''))), + ])) + +# =========================================================================== +# CORSProxy +# =========================================================================== +COMPONENTS.append(('corsproxy', 'CORSProxy', + 'A small PHP CORS proxy intended for browser-side code that needs to reach servers without CORS headers.', + 'wp-php-toolkit/corsproxy', + [ + ('Why this exists', + '

A Playground-style browser tool reads https://api.github.com/repos/WordPress/php-toolkit, a plugin ZIP from downloads.wordpress.org, or a raw fixture from GitHub. The browser blocks the response when the upstream server does not send the required CORS headers, even though PHP can fetch the same public URL server-side.

' + '

The CORSProxy component is that server-side bridge. It accepts a target URL, fetches it from PHP, and returns a browser-readable response. Because an open proxy is a security and abuse risk, real deployments should add host allowlists, rate limits, header controls, and private-network protections appropriate to their environment.

', + None), + ('Run the proxy locally', + '

Run on your machine: the proxy needs to listen on a port. Start PHP\'s built-in server and request any HTTPS URL through it.

' + '
PLAYGROUND_CORS_PROXY_DISABLE_RATE_LIMIT=1 \\\n  php -S 127.0.0.1:5263 vendor/wp-php-toolkit/corsproxy/cors-proxy.php\n\n# In another terminal:\ncurl -s "http://127.0.0.1:5263/cors-proxy.php/https://api.github.com/repos/WordPress/php-toolkit" | head\n
', + None), + ('Production rate limiting', + '

Drop a cors-proxy-config.php next to cors-proxy.php. If that file defines a playground_cors_proxy_maybe_rate_limit() function, the proxy calls it before forwarding any request — your one chance to reject early. Without the file, the proxy applies its default rate limiter, which is fine for development but should be replaced for any deployment that gets real traffic.

' + '

This example uses a per-IP token bucket stored on disk. Replace with Redis or memcached for multi-host deployments.

', + ('cors-proxy-config.php', ''' $now - $window; +\t} ); + +\tif ( count( $hits ) >= $max_req ) { +\t\theader( 'Retry-After: ' . $window ); +\t\thttp_response_code( 429 ); +\t\techo 'Rate limit exceeded'; +\t\texit; +\t} + +\t$hits[] = $now; +\tfile_put_contents( $bucket, json_encode( array_values( $hits ) ) ); +} + +echo "Config loaded — rate limiter armed.\\n";''', False)), + ('Allowlist upstream hosts', + '

Out of the box the proxy will fetch any public URL. Most real deployments want a fixed list of upstreams — GitHub, Packagist, wp.org. Both the rate-limit logic and the allowlist live in the same hook, since cors-proxy.php only calls playground_cors_proxy_maybe_rate_limit() once. The example below shows just the allowlist concern; in practice you stack both in one function inside cors-proxy-config.php.

', + ('cors-proxy-config-allowlist.php', '''Once deployed, the client side is just fetch() with the proxy URL. Drop this into any HTML page.

' + '
const PROXY = "https://cors.example.com/cors-proxy.php";\n\nasync function viaProxy(url, init = {}) {\n  const res = await fetch(`${PROXY}/${url}`, {\n    ...init,\n    headers: {\n      ...(init.headers || {}),\n      "X-Cors-Proxy-Allowed-Request-Headers": "Authorization",\n    },\n  });\n  if (!res.ok) throw new Error(`Proxy returned ${res.status}`);\n  return res;\n}\n\nconst repo = await viaProxy("https://api.github.com/repos/WordPress/php-toolkit").then(r => r.json());\nconsole.log(repo.full_name, repo.stargazers_count);\n
', + None), + ('Deploy behind nginx', + '

The proxy is a single PHP script — any SAPI works. nginx + php-fpm is a common production setup. PATH_INFO is what the proxy reads to learn the target URL.

' + '
server {\n  listen 443 ssl http2;\n  server_name cors.example.com;\n\n  root /var/www/cors-proxy;\n  index cors-proxy.php;\n\n  location ~ ^/cors-proxy\\.php(/.*)?$ {\n    fastcgi_pass unix:/run/php/php8.1-fpm.sock;\n    fastcgi_split_path_info ^(.+\\.php)(/.*)$;\n    fastcgi_param SCRIPT_FILENAME $document_root/cors-proxy.php;\n    fastcgi_param PATH_INFO $fastcgi_path_info;\n    include fastcgi_params;\n  }\n}\n
', + None), + ])) + +# =========================================================================== +# CLI +# =========================================================================== +COMPONENTS.append(('cli', 'CLI', + 'POSIX-style argument parser. Long options, short bundles, inline values, positional args — one static call.', + 'wp-php-toolkit/cli', + [ + ('Why this exists', + '

Real CLI tools in PHP usually mean either pulling in symfony/console (and the transitive dependencies that come with it) or hand-rolling argv parsing that breaks the first time someone writes -vvv or --port=8080. The toolkit\'s CLI class is one static method, no dependencies, and handles the POSIX shapes you actually see.

', + None), + ('Parse a single flag', + '

The smallest useful invocation: one boolean flag, one positional. Each option is a four-tuple of [ short, has_value, default, description ].

', + ('parse-flag.php', php('''use WordPress\\CLI\\CLI; + +$option_defs = array( +\t'verbose' => array( 'v', false, false, 'Enable verbose output' ), +); + +list( $positionals, $options ) = CLI::parse_command_args_and_options( +\tarray( '-v', 'input.txt' ), +\t$option_defs +); + +echo "verbose: " . ( $options['verbose'] ? 'yes' : 'no' ) . "\\n"; +echo "input: " . $positionals[0] . "\\n";'''))), + ('Mix values, flags, and bundles', + '

The parser accepts --port 8080, --port=8080, -p 8080, and -p=8080. It also expands bundled boolean shorts such as -afv.

', + ('mix-shapes.php', php('''use WordPress\\CLI\\CLI; + +$option_defs = array( +\t'all' => array( 'a', false, false, 'Process everything' ), +\t'force' => array( 'f', false, false, 'Overwrite existing files' ), +\t'verbose' => array( 'v', false, false, 'Verbose output' ), +\t'output' => array( 'o', true, null, 'Output path' ), +\t'port' => array( 'p', true, '3000', 'Server port' ), +); + +$argv = array( '-afv', '--port=8080', '-o', '/tmp/result.txt', 'input.json' ); +list( $positionals, $options ) = CLI::parse_command_args_and_options( $argv, $option_defs ); + +echo "input: " . $positionals[0] . "\\n"; +echo "flags: " . implode( ', ', array_keys( array_filter( array( +\t'all' => $options['all'], +\t'force' => $options['force'], +\t'verbose' => $options['verbose'], +) ) ) ) . "\\n"; +echo "output: " . $options['output'] . "\\n"; +echo "port: " . $options['port'] . "\\n";'''))), + ('Validate required options', + '

The parser fills in defaults but never enforces "required". Check for null after parsing — full control over the error message.

', + ('require-options.php', php('''use WordPress\\CLI\\CLI; + +$option_defs = array( +\t'site-url' => array( 'u', true, null, 'Public site URL (required)' ), +\t'site-path' => array( null, true, null, 'Target directory (required)' ), +); + +$argv = array( '--site-url', 'https://mysite.test' ); + +try { +\tlist( , $options ) = CLI::parse_command_args_and_options( $argv, $option_defs ); +\tforeach ( array( 'site-url', 'site-path' ) as $name ) { +\t\tif ( null === $options[ $name ] ) { +\t\t\tthrow new RuntimeException( "Missing required option --{$name}" ); +\t\t} +\t} +\techo "All good.\\n"; +} catch ( Exception $e ) { +\techo "error: " . $e->getMessage() . "\\n"; +}'''))), + ('Generate --help from definitions', + '

Because each option carries its own description, you can render help text by walking the same definitions you parse with. No second source of truth.

', + ('help-text.php', php('''use WordPress\\CLI\\CLI; + +$option_defs = array( +\t'output' => array( 'o', true, null, 'Write result to FILE' ), +\t'force' => array( 'f', false, false, 'Overwrite existing files' ), +\t'verbose' => array( 'v', false, false, 'Verbose output' ), +\t'help' => array( 'h', false, false, 'Show this help and exit' ), +); + +function render_help( array $defs ) { +\techo "Usage: mytool [options] \\n\\nOptions:\\n"; +\tforeach ( $defs as $long => $def ) { +\t\tlist( $short, $has_value, $default, $desc ) = $def; +\t\t$flag = ( $short ? "-{$short}, " : ' ' ) . "--{$long}"; +\t\tif ( $has_value ) $flag .= '=VALUE'; +\t\techo sprintf( " %-28s %s\\n", $flag, $desc ); +\t} +} + +list( , $options ) = CLI::parse_command_args_and_options( array( '-h' ), $option_defs ); +if ( $options['help'] ) render_help( $option_defs );'''))), + ('Git-style subcommands', + '

To build a tool with subcommands like mytool deploy, peel the first positional off argv, dispatch, and parse the rest with a per-command option set.

', + ('subcommands.php', php('''use WordPress\\CLI\\CLI; + +$commands = array( +\t'deploy' => array( +\t\t'env' => array( 'e', true, 'staging', 'Target environment' ), +\t\t'dry-run' => array( 'n', false, false, 'Preview without applying' ), +\t), +\t'rollback' => array( +\t\t'to' => array( 't', true, null, 'Revision to roll back to' ), +\t), +); + +function run( array $argv, array $commands ) { +\tif ( empty( $argv ) ) { +\t\techo "Usage: mytool [options]\\nCommands: " . implode( ', ', array_keys( $commands ) ) . "\\n"; +\t\treturn; +\t} +\t$command = array_shift( $argv ); +\tif ( ! isset( $commands[ $command ] ) ) { +\t\techo "Unknown command: {$command}\\n"; +\t\treturn; +\t} +\tlist( $positionals, $options ) = CLI::parse_command_args_and_options( $argv, $commands[ $command ] ); +\techo "command={$command}\\n"; +\techo "options: " . json_encode( $options ) . "\\n"; +\techo "positionals: " . json_encode( $positionals ) . "\\n"; +} + +run( array( 'deploy', '--env=production', '-n', 'web-01', 'web-02' ), $commands ); +echo "---\\n"; +run( array( 'rollback', '-t', 'abc123' ), $commands );'''))), + ])) + +# =========================================================================== +# Polyfill +# =========================================================================== +COMPONENTS.append(('polyfill', 'Polyfill', + 'PHP 8 string functions on PHP 7.2+, WordPress hook stubs, and translation/escaping passthroughs so toolkit code runs without WordPress.', + 'wp-php-toolkit/polyfill', + [ + ('Why this exists', + '

A lot of WordPress-adjacent code wants to call esc_html(), __(), or apply_filters() without booting WordPress. The polyfill component provides minimal but real implementations so that code runs unchanged outside WordPress, and stays out of the way when WordPress is loaded (every function uses function_exists() guards).

', + None), + ('PHP 8 string functions on PHP 7.2', + '

The polyfills define str_contains, str_starts_with, str_ends_with, and array_key_first only when missing.

', + ('php8-strings.php', php('''var_dump( str_starts_with( '/var/www/html', '/var' ) ); +var_dump( str_ends_with( 'image.png', '.png' ) ); +var_dump( str_contains( 'WordPress Toolkit', 'Toolkit' ) ); + +$first_key = array_key_first( array( 'alpha' => 1, 'beta' => 2 ) ); +echo "first key: {$first_key}\\n";'''))), + ('Escaping and translation stubs', + '

Pass-through implementations let you write code that looks WordPressy and runs anywhere.

', + ('wp-stubs.php', php('''echo __( 'Hello, world' ) . "\\n"; +echo esc_html( '' ) . "\\n"; +echo esc_attr( 'a "quoted" value' ) . "\\n"; +echo esc_url( 'https://example.com/?a=1&b=2' ) . "\\n";'''))), + ('A simple filter chain', + '

The hook system is a real implementation of the WordPress filter API: registered callbacks get applied in priority order, and each one transforms the running value.

', + ('filter-chain.php', php('''add_filter( 'sanitize_title', 'trim' ); +add_filter( 'sanitize_title', 'strtolower' ); +add_filter( 'sanitize_title', function ( $title ) { +\treturn preg_replace( '/\\s+/', '-', $title ); +} ); + +echo apply_filters( 'sanitize_title', ' My Post Title ' ) . "\\n";'''))), + ('Priority ordering and multi-arg passing', + '

Lower priority numbers run first. The fourth argument to add_filter controls how many context values get passed to the callback.

', + ('priority-args.php', php('''add_filter( 'render_price', function ( $html, $price, $currency ) { +\treturn $html . " ({$currency} markup)"; +}, 30, 3 ); + +add_filter( 'render_price', function ( $html, $price ) { +\treturn "{$html}"; +}, 10, 2 ); + +add_filter( 'render_price', function ( $html, $price, $currency ) { +\tif ( 'EUR' === $currency ) return $html . ' EUR'; +\treturn $html . " {$currency}"; +}, 20, 3 ); + +echo apply_filters( 'render_price', '19.99', 19.99, 'EUR' ) . "\\n";'''))), + ('Hook-based extension points in standalone libraries', + '

Use do_action and apply_filters as cheap extension points in your own code, without depending on WordPress.

', + ('library-hooks.php', php('''class ImportPipeline { +\tpublic function process( array $row ) { +\t\t$row = apply_filters( 'import_pipeline_normalize', $row ); +\t\tdo_action( 'import_pipeline_row_processed', $row ); +\t\treturn $row; +\t} +} + +add_filter( 'import_pipeline_normalize', function ( $row ) { +\t$row['email'] = strtolower( trim( $row['email'] ) ); +\treturn $row; +} ); + +$log = array(); +add_action( 'import_pipeline_row_processed', function ( $row ) use ( &$log ) { +\t$log[] = $row['email']; +} ); + +$pipeline = new ImportPipeline(); +$pipeline->process( array( 'email' => ' USER@EXAMPLE.COM ' ) ); +$pipeline->process( array( 'email' => 'OTHER@example.com' ) ); + +echo implode( "\\n", $log ) . "\\n";'''))), + ])) + +# =========================================================================== +# Blueprints +# =========================================================================== +COMPONENTS.append(('blueprints', 'Blueprints', + 'Declarative WordPress site provisioning. Write a JSON description of plugins, options, and content; let the runner execute it.', + 'wp-php-toolkit/blueprints', + [ + ('Why this exists', + '

A WordPress environment is more than a database dump. It can require a specific core version, plugins, themes, site options, uploaded files, content, and setup steps. Rebuilding that by hand makes demos, tests, bug reports, workshops, and CI fixtures drift over time.

' + '

The Blueprints component treats site setup as data. A blueprint JSON document describes the desired steps, and the runner applies them to either a new WordPress install or an existing one. The validator exists because user-authored JSON needs clear, path-specific errors rather than generic schema failures.

' + '

RunnerConfiguration separates the web root from the WordPress core directory, since real hosts often put them in different places. Both paths are explicit on the runner, never inferred.

' + '

Blueprints can create a new WordPress install (download core, set up the database, apply steps) or apply to an existing site. Creating a fresh install needs filesystem access this in-browser runtime doesn\'t have, so the runnable snippets focus on APPLY_TO_EXISTING_SITE.

', + None), + ('Configure a runner for an existing site', + '

RunnerConfiguration is a fluent builder. The minimum: target site root, target site URL, execution mode.

', + ('configure.php', php('''use WordPress\\Blueprints\\Runner; +use WordPress\\Blueprints\\RunnerConfiguration; + +$config = ( new RunnerConfiguration() ) +\t->set_execution_mode( Runner::EXECUTION_MODE_APPLY_TO_EXISTING_SITE ) +\t->set_target_site_root( '/wordpress' ) +\t->set_target_site_url( 'http://playground.test/' ); + +echo "mode: " . $config->get_execution_mode() . "\\n"; +echo "root: " . $config->get_target_site_root() . "\\n"; +echo "url: " . $config->get_target_site_url() . "\\n";'''))), + ('Generate blueprint JSON from PHP', + '

CI jobs and tests stay clearer when PHP builds the blueprint from data instead of hand-writing JSON. Keep the structure plain: version, then a list of step arrays.

', + ('build-json.php', php('''$site_name = 'Demo Site'; +$plugins = array( 'gutenberg', 'classic-editor' ); + +$blueprint = array( +\t'version' => 2, +\t'steps' => array( +\t\tarray( +\t\t\t'step' => 'setSiteOptions', +\t\t\t'options' => array( +\t\t\t\t'blogname' => $site_name, +\t\t\t\t'permalink_structure' => '/%postname%/', +\t\t\t\t'show_on_front' => 'page', +\t\t\t), +\t\t), +\t), +); + +foreach ( $plugins as $slug ) { +\t$blueprint['steps'][] = array( +\t\t'step' => 'installPlugin', +\t\t'pluginData' => "https://downloads.wordpress.org/plugin/{$slug}.zip", +\t); +\t$blueprint['steps'][] = array( +\t\t'step' => 'activatePlugin', +\t\t'plugin' => "{$slug}/{$slug}.php", +\t); +} + +echo json_encode( $blueprint, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES ) . "\\n";'''))), + ('Validate before running', + '

The schema validator returns a human-readable ValidationError instead of a generic "does not match schema" failure. Use it before handing user-authored JSON to a runner.

', + ('validate.php', php('''use WordPress\\Blueprints\\Validator\\HumanFriendlySchemaValidator; + +$schema = array( +\t'type' => 'object', +\t'required' => array( 'version', 'steps' ), +\t'properties' => array( +\t\t'version' => array( 'type' => 'integer' ), +\t\t'steps' => array( +\t\t\t'type' => 'array', +\t\t\t'items' => array( +\t\t\t\t'type' => 'object', +\t\t\t\t'required' => array( 'step' ), +\t\t\t\t'properties' => array( +\t\t\t\t\t'step' => array( 'type' => 'string' ), +\t\t\t\t), +\t\t\t), +\t\t), +\t), +); + +$blueprint = array( +\t'version' => 2, +\t'steps' => array( +\t\tarray( 'pluginData' => 'https://downloads.wordpress.org/plugin/gutenberg.zip' ), +\t), +); + +$error = ( new HumanFriendlySchemaValidator( $schema ) )->validate( $blueprint ); +if ( null === $error ) { +\techo "valid\\n"; +} else { +\techo $error->get_pretty_path() . ": " . $error->message . "\\n"; +}'''))), + ('The Blueprint JSON shape', + '

A blueprint is a JSON document with a version field and a steps array. Each step has a "step" discriminator and step-specific fields. This is the same shape used by WordPress Playground.

' + '
{\n  "version": 2,\n  "steps": [\n    { "step": "setSiteOptions",\n      "options": {\n        "blogname": "Demo Site",\n        "permalink_structure": "/%postname%/"\n      } },\n    { "step": "installPlugin",\n      "pluginData": "https://downloads.wordpress.org/plugin/gutenberg.zip" },\n    { "step": "activatePlugin",\n      "plugin": "gutenberg/gutenberg.php" }\n  ]\n}
', + None), + ])) + +# =========================================================================== +# ToolkitCodingStandards +# =========================================================================== +COMPONENTS.append(('coding-standards', 'ToolkitCodingStandards', + 'PHP_CodeSniffer sniffs used by this project: enforce Yoda comparisons and ban the short ternary where it hides falsy-value bugs.', + 'wp-php-toolkit/toolkit-coding-standards', + [ + ('Why this exists', + '

This package is not a general-purpose style guide. It holds project-specific PHP_CodeSniffer rules for review comments the toolkit wants automated: comparisons should follow the WordPress Yoda style, and short ternaries should not hide whether a fallback is meant for null only or for all falsy values.

' + '

Use it in this monorepo, or in a project that intentionally wants the same review tradeoffs. If your project does not follow WordPress-style comparisons, the Yoda sniff is probably the wrong rule for you.

', + None), + ('Reference the standard from your phpcs.xml', + '

The component is a PHPCS ruleset, so the useful examples are configuration and before/after code rather than runtime snippets. Activate both sniffs at once by referencing WordPressToolkitCodingStandards:

' + '
<?xml version="1.0"?>\n<ruleset name="My Project">\n  <file>src/</file>\n\n  <!-- Activate both toolkit sniffs -->\n  <rule ref="WordPressToolkitCodingStandards"/>\n\n  <!-- Or pick them individually -->\n  <!-- <rule ref="WordPressToolkitCodingStandards.PHP.EnforceYodaComparison"/> -->\n  <!-- <rule ref="WordPressToolkitCodingStandards.PHP.DisallowShortTernary"/> -->\n</ruleset>
' + '

Then run phpcs and phpcbf the usual way:

' + '
vendor/bin/phpcs --standard=phpcs.xml .\nvendor/bin/phpcbf --standard=phpcs.xml .
', + None), + ('EnforceYodaComparison: catches accidental assignment', + '

Yoda comparisons (true === $x) make typo-induced assignments easier to catch and match the WordPress style used throughout the toolkit:

' + '
// Bug: single = inside a condition. Always truthy, mutates $status.\nif ( $status = \'published\' ) {\n    publish_post( $post );\n}\n\n// Yoda style: writing this typo would be a parse error.\nif ( \'published\' === $status ) {\n    publish_post( $post );\n}
' + '

The sniff covers ===, !==, ==, and !=, and stays quiet when both sides are dynamic.

', + None), + ('Why ban the short ternary', + '

Developers confuse the short ternary ($a ?: $b) with the null-coalescing operator ($a ?? $b). They differ on falsy-but-not-null values: 0 ?: \'fallback\' returns \'fallback\', but 0 ?? \'fallback\' returns 0. The sniff bans ?: entirely so reviewers don\'t have to relitigate this on every PR.

', + None), + ('Review-friendly replacements', + '

When the fallback should apply only to null, use ??. When the fallback should apply to every falsy value, write the full ternary so the intent is visible in review.

' + '
// Only missing values fall back. 0 and "" are preserved.\n$limit = $request_limit ?? 20;\n\n// Any falsy value falls back. The duplicated condition is intentional.\n$title = $raw_title ? $raw_title : \'Untitled\';
', + None), + ])) diff --git a/bin/_expected_outputs.json b/bin/_expected_outputs.json new file mode 100644 index 000000000..d63c37235 --- /dev/null +++ b/bin/_expected_outputs.json @@ -0,0 +1,81 @@ +{ + "blockparser::audit-embeds.php": "ok https://twitter.com/wordpress/status/1\nok https://youtube.com/watch?v=abc\nSTALE https://vine.co/v/xyz\n", + "blockparser::count-blocks.php": " 2 core/paragraph\n 1 core/group\n 1 core/heading\n 1 core/image\n", + "blockparser::find-custom-block.php": "1. Jane (5/5): Loved it.\n2. Joe (4/5): Pretty good.\n", + "blockparser::has-block.php": "has button\nmissing gallery\n", + "blockparser::lint-headings.php": "ok Intro: H2\nWARN Subsection: jumped from H2 to H4\nok Body: H3\n", + "blockparser::parse.php": "core/heading: Welcome\ncore/paragraph: Hello from the block editor.\n", + "blueprints::build-json.php": "{\n \"version\": 2,\n \"steps\": [\n {\n \"step\": \"setSiteOptions\",\n \"options\": {\n \"blogname\": \"Demo Site\",\n \"permalink_structure\": \"/%postname%/\",\n \"show_on_front\": \"page\"\n }\n },\n {\n \"step\": \"installPlugin\",\n \"pluginData\": \"https://downloads.wordpress.org/plugin/gutenberg.zip\"\n },\n {\n \"step\": \"activatePlugin\",\n \"plugin\": \"gutenberg/gutenberg.php\"\n },\n {\n \"step\": \"installPlugin\",\n \"pluginData\": \"https://downloads.wordpress.org/plugin/classic-editor.zip\"\n },\n {\n \"step\": \"activatePlugin\",\n \"plugin\": \"classic-editor/classic-editor.php\"\n }\n ]\n}\n", + "blueprints::configure.php": "mode: apply-to-existing-site\nroot: /wordpress\nurl: http://playground.test/\n", + "blueprints::validate.php": "Blueprint root[\"steps\"][0]: Missing required field: step.\n", + "bytestream::deflate-roundtrip.php": "original : 1050 bytes\ndeflated : 45 bytes (4.3%)\nround-trip: OK\n", + "bytestream::limited.php": "body sees: BODY:hello there\nremaining in source: |FOOTER:done\n", + "bytestream::lines.php": "[1] alpha\n[2] bravo\n[3] charlie\n[4] delta\n[5] echo\n", + "bytestream::memory-pipe.php": "got: first chunk\nsecond chunk\nthird chunk\n", + "bytestream::teaser-read.php": "Read 1800 bytes in 256-byte chunks.\n", + "cli::help-text.php": "Usage: mytool [options] \n\nOptions:\n -o, --output=VALUE Write result to FILE\n -f, --force Overwrite existing files\n -v, --verbose Verbose output\n -h, --help Show this help and exit\n", + "cli::mix-shapes.php": "input: input.json\nflags: all, force, verbose\noutput: /tmp/.txt\nport: 8080\n", + "cli::parse-flag.php": "verbose: yes\ninput: input.txt\n", + "cli::require-options.php": "error: Missing required option --site-path\n", + "cli::subcommands.php": "command=deploy\noptions: {\"env\":\"production\",\"dry-run\":true}\npositionals: [\"web-01\",\"web-02\"]\n---\ncommand=rollback\noptions: {\"to\":\"abc123\"}\npositionals: []\n", + "dataliberation::build-wxr.php": "items: 2\nterms: 3\nBlog post exported\n", + "dataliberation::md-to-wxr.php": "posts: 2\nblock markup exported\nfrontmatter title exported\n", + "dataliberation::rewrite-urls.php": "new URL present\nold URL removed\n", + "dataliberation::wxr-quickstart.php": "bytes: 475\ntitle exported\nstatus exported\n", + "dataliberation::wxr-read.php": "site_option: {\"option_name\":\"blogname\",\"option_value\":\"Demo\"}\npost: {\"post_title\":\"First\",\"post_id\":\"1\",\"post_type\":\"post\",\"post_content\":\"Body 1\"}\npost: {\"post_title\":\"Second\",\"post_id\":\"2\",\"post_type\":\"post\",\"post_content\":\"Body 2\"}\n", + "encoding::mixed-encoding.php": "#1 ok: Plain ASCII\n#2 ok: Caf\u00e9\n#3 recovered as latin1: caf\u00e9\n#4 recovered as latin1: weird \u00c0 byte\n", + "encoding::noncharacters.php": "normal text: ok\nU+FFFE: reject\nU+FDD0: reject\n", + "encoding::pipeline.php": "good valid=Y noncharacter=N -> Caf\u00e9\nlatin1 valid=N noncharacter=N -> caf\ufffd\noverlong valid=N noncharacter=N -> x\ufffd\ufffdy\nnoncharac valid=Y noncharacter=Y -> hi \ufffe there\n", + "encoding::scrub.php": "the byte \ufffd should not be here.\n.\ufffd\ufffd.\n", + "encoding::validate.php": "ASCII: valid\nUTF-8 pencil: valid\nlatin-1 byte: invalid\noverlong slash: invalid\nsurrogate half: invalid\n", + "filesystem::atomic-write.php": "config: {\"v\":2}\nno .tmp leftovers: 1 entries in root\n", + "filesystem::cross-backend-copy.php": "in memory after two copies:\n posts: 2024-01.md\n index:

Home

\n", + "filesystem::local-chroot.php": "Hi from local disk.\nexists after cleanup? no\n", + "filesystem::path-helpers.php": "/var/www/site/index.php\n/a/b\na/c/e\n", + "filesystem::sqlite.php": "post-1.md: # Post 1\npost-2.md: # Post 2\npost-3.md: # Post 3\n", + "filesystem::teaser-memory.php": "Hello, world!", + "filesystem::test-without-disk.php": "{\"version\":\"1.2.4\"}\n", + "git::branches.php": "on experiment: {\"flag\":true}\non trunk: {\"flag\":false}\n", + "git::commit-in-memory.php": "commit: \nHEAD: \nREADME: # My Project\n", + "git::git-filesystem.php": "tree:\n /posts/about.md\n /posts/hello.md\n\nhello.md now:\n# Hello\nSecond draft.\n", + "git::merge-branches.php": "merge head: \nconflicts: none\nresult:\nbuy oat milk\nwalk dog\nread book\nwrite blog post\n", + "git::options-snapshot.php": "Files changed in last snapshot:\n options.json\n", + "git::walk-history.php": " expand examples\n fix typo\n add intro\n", + "html::absolute-links.php": "

See about, x, and contact.

", + "html::bookmarks.php": "
  • Buy milk
  • Walk the dog
  • Read book
", + "html::breadcrumbs.php": "found 2 figure images\n
\"Hero\"
Hero shot

Body copy \"\" mid-paragraph.

\"Diagram\"
", + "html::csp-nonce.php": "nonce: \n\n", + "html::decode-entities.php": "attribute: path?a=1&b=2©\ntext: AT&T \u2014 100% \ud83d\ude00\nbool(false)\n", + "html::lazy-load-images.php": "
\n\t\"Hero\"\n\t

Intro copy.

\n\t\"Inline\"\n
", + "html::outline.php": " H1 Title\n H2 Chapter 1\n H2 Chapter 2\n", + "html::sanitize-html.php": "

Hi friend!

", + "html::srcset-rewrite.php": "
\"Sunset\"
", + "httpclient::parse-response.php": "status: 201 Created\nok: yes\ntype: application/json\nsize: 27 bytes\n", + "httpclient::request-object.php": "POST https://api.example.test/posts\ncontent-type: application/json\ncontent-length: 39\nauthorization: Basic dXNl...\n", + "httpserver::buffered-writer.php": "headers before send:\nContent-Type: text/html\n\nbody:\nHi

Hello

Buffered body, sent at the end.

", + "markdown::count-blocks.php": "core/heading: 1\ncore/paragraph: 2\ncore/table: 1\ncore/code: 1\ncore/quote: 1\n", + "markdown::frontmatter.php": "Title: The Name of the Wind\nStatus: publish\nTags: fantasy, kingkiller\n", + "markdown::migrate-folder.php": "=== roadmap (/tmp//roadmap.md) ===\n\n

Roadmap

\n\n\n\n

Hello world.

\n\n\n...\n\n", + "markdown::quickstart.php": "\n

Hello

\n\n\n\n

Welcome to WordPress.

\n\n\n", + "markdown::roundtrip.php": "## Round trip\n\n- one\n- two\n- three\n\n", + "merge::conflicts.php": "ours: line 2 from Alice\ntheirs: line 2 from Bob\n\n--- merged content with markers ---\nline 1\n\n<<<<<<< HEAD\nline 2 from Alice\n\n=======\nline 2 from Bob\n\n>>>>>>> incoming \n\n", + "merge::git-patch.php": "diff --git a/post.yml b/post.yml\n--- a/post.yml\n+++ b/post.yml\n@@ -1,4 +1,5 @@- title: Hello\n+ title: Hello, world\n author: Alice\n- status: draft\n+ status: published\n+ tags: greeting\n \n", + "merge::line-diff.php": "= alpha\n- beta\n+ BETA\n= gamma\n+ delta\n= \n", + "merge::sync-folder-vs-db.php": "=== hello.md ===\n(conflict \u2014 needs review)\n# Hello\n\n<<<<<<< HEAD\nDraft body, expanded on disk.\n\n=======\nNew section from the editor.\n\n>>>>>>> incoming \n\n\n=== about.md ===\n(conflict \u2014 needs review)\n# About\n\n<<<<<<< HEAD\nWho *they* are.\n\n=======\nWho we really are.\n\n>>>>>>> incoming \n\n\n", + "merge::three-way.php": "clean merge:\nintro updated\nbody\noutro\nappendix\n\n", + "polyfill::filter-chain.php": "my-post-title\n", + "polyfill::library-hooks.php": "user@example.com\nother@example.com\n", + "polyfill::php8-strings.php": "bool(true)\nbool(true)\nbool(true)\nfirst key: alpha\n", + "polyfill::priority-args.php": "19.99 EUR (EUR markup)\n", + "polyfill::wp-stubs.php": "Hello, world\n<script>alert("xss")</script>\na "quoted" value\nhttps://example.com/?a=1&b=2\n", + "xml::bump-prices.php": "PHP InternalsWordPress at Scale", + "xml::opml.php": "Hacker News\thttps://news.ycombinator.com/rss\nLWN\thttps://lwn.net/headlines/rss\nWordPress\thttps://wordpress.org/news/feed/\n", + "xml::rewrite-wxr-urls.php": "rewrote 3 text nodes\n\nhttps://new.example.comhttps://new.example.com/2024/post-1https://new.example.com/?p=1", + "xml::wxr-namespaces.php": "title: Hello World\ndc/creator: admin\nwp/post_id: 42\nwp/status: publish\n", + "zip::epub.php": "mimetype: application/epub+zip\nsize on disk: 839 bytes\n", + "zip::repack.php": "new config.json: {\"debug\":true,\"version\":\"1.0.1\"}\nuntouched: etc/passwd\n./safe/path.txt => ./safe/path.txt\na/../../b/secret => a/../b/secret\na//b///c.txt => a/b/c.txt\n../../../../root/.ssh/authorized_keys => root/.ssh/authorized_keys\n", + "zip::zip-to-memfs.php": "files now in memory:\n /app/README.md\n /app/VERSION\n /app/assets/style.css\n /app/index.php\n /app/lib/util.php\n" +} diff --git a/bin/build-docs-bundle.sh b/bin/build-docs-bundle.sh new file mode 100755 index 000000000..ce6eb195d --- /dev/null +++ b/bin/build-docs-bundle.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# Rebuilds docs/assets/php-toolkit.zip and regenerates the docs HTML pages. +# Run this whenever components/ changes or the docs page generator (bin/build-docs.py) +# changes. +set -euo pipefail + +cd "$(dirname "$0")/.." + +echo "==> composer install --no-dev --optimize-autoloader" +composer install --no-dev --optimize-autoloader --quiet + +echo "==> bundling docs/assets/php-toolkit.zip" +rm -f docs/assets/php-toolkit.zip +zip -qr docs/assets/php-toolkit.zip components vendor bootstrap.php composer.json \ + -x "*/Tests/*" "*/tests/*" "*/.git/*" "*/.github/*" "*/node_modules/*" + +echo "==> regenerating legacy docs/_legacy/*/index.html" +python3 bin/build-docs.py + +echo "==> regenerating docs/reference/*.html" +python3 bin/build-reference.py + +echo "Done. docs/assets/php-toolkit.zip = $(du -h docs/assets/php-toolkit.zip | cut -f1)" diff --git a/bin/build-docs.py b/bin/build-docs.py new file mode 100755 index 000000000..ede776442 --- /dev/null +++ b/bin/build-docs.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python3 +""" +Generates docs//index.html for every component plus the docs/index.html +landing page. The component catalog lives in bin/_docs_components.py so that +content and orchestration stay separate. +""" + +import json +import os +import re +import sys +from html import escape as h + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from _docs_components import ( + COMPONENTS, + COMPONENT_GUIDES, + COMPONENT_RELATIONS, + STARTER_PATHS, +) + +DOCS = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'docs', '_legacy') +EXPECTED_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '_expected_outputs.json') +ASSET_VERSION = '20260429-concept-guide' + +EXPECTED = {} +if os.path.exists(EXPECTED_PATH): + with open(EXPECTED_PATH) as f: + EXPECTED = {tuple(k.split('::')): v for k, v in json.load(f).items()} + +PAGE_HEAD = ''' + + + + +{title} — PHP Toolkit + + + + + + + +
+\tPHP Toolkit +\t +
+''' + +PAGE_FOOT = ''' + + +''' + + +def snippet_block(slug, name, code, runnable=True): + # \n' + ) + runnable_attr = '' if runnable else ' runnable="false"' + return ( + f'\n' + f'\n' + f'{expected_block}' + f'\n' + ) + + +def render_example(slug, snippet): + name, code = snippet[0], snippet[1] + runnable = len(snippet) < 3 or snippet[2] + if not runnable: + return snippet_block(slug, name, code, False) + return snippet_block(slug, name, code, True) + + +def slugify(text): + return re.sub(r'[^\w\s-]', '', text.lower()).strip().replace(' ', '-') + + +def render_component(slug, title, lede, install, sections): + nav_items = [] + for s, t, _, _, _ in COMPONENTS: + cls = ' class="current"' if s == slug else '' + nav_items.append(f'\t\t\t{h(t)}') + sidebar = ( + '\t\n' + ) + + out = [PAGE_HEAD.format( + title=h(title), + description=h(re.sub(r'<[^>]+>', '', lede)), + asset_version=ASSET_VERSION, + )] + out.append('
\n') + out.append(sidebar) + out.append('\t
\n') + out.append(f'\t\t

{h(title)}

\n') + out.append(f'\t\t

{lede}

\n') + if install: + out.append(f'\t\tcomposer require {h(install)}\n') + + purpose = None + usage_sections = sections + if sections and sections[0][0].lower() == 'why this exists': + purpose = sections[0] + usage_sections = sections[1:] + + if purpose: + _, body_html, snippet = purpose + if body_html: + out.append(f'\t\t{body_html}\n') + if snippet: + out.append(render_example(slug, snippet)) + + guide = COMPONENT_GUIDES.get(slug, {}) + if guide: + mental_model = guide.get('mental_model') + journey = guide.get('journey', ()) + if mental_model: + out.append(f'\t\t{mental_model}\n') + if journey: + out.append('\t\t

You will learn to:

\n') + out.append('\t\t
    \n') + for label, _text in journey: + out.append(f'\t\t\t
  • {h(label)}
  • \n') + out.append('\t\t
\n') + + if install: + out.append( + '\t\t

Most snippets below run in the browser through WordPress Playground. ' + 'Click Run on any example to execute it; edit the code and run again to see what changes. ' + 'Static snippets show config or shell commands that need a real local environment.

\n' + ) + + for heading, body_html, snippet in usage_sections: + out.append(f'\t\t

{h(heading)}

\n') + if body_html: + out.append(f'\t\t{body_html}\n') + if snippet: + out.append(render_example(slug, snippet)) + + related = COMPONENT_RELATIONS.get(slug, ()) + if related: + out.append('\t\t

See also

\n') + out.append('\t\t\n') + out.append('\t
\n
\n') + out.append(PAGE_FOOT) + return ''.join(out) + + +def render_index(): + title_by_slug = {slug: title for slug, title, _, _, _ in COMPONENTS} + cards = [] + for slug, title, lede, _, _ in COMPONENTS: + clean = re.sub(r'<[^>]+>', '', lede) + first = clean.split('.')[0] + if len(first) > 110: + first = first[:107].rsplit(' ', 1)[0] + '…' + suffix = '' if first.endswith(('…', '.')) else '.' + cards.append( + f'\t\t
  • {h(title)}' + f'{h(first)}{suffix}
  • ' + ) + cards_html = '\n'.join(cards) + path_cards = [] + for title, description, slugs in STARTER_PATHS: + links = ' '.join( + f'{h(title_by_slug[slug])}' for slug in slugs + ) + path_cards.append( + f'\t\t
  • {h(title)}{h(description)}' + f'
  • ' + ) + paths_html = '\n'.join(path_cards) + return f''' + + + + +PHP Toolkit — runnable docs + + + + +
    +\tPHP Toolkit +\t +
    +
    +\t

    PHP Toolkit

    +\t

    Eighteen standalone pure-PHP libraries for WordPress and general PHP, with no extension or Composer dependencies. Each guide starts with the story for that component, outlines the route through the page, names the main APIs, and then uses examples only where code clarifies the idea.

    + +\t

    Choose a Path

    +\t
      +{paths_html} +\t
    + +\t

    Components

    +\t
      +{cards_html} +\t
    + +\t

    How these examples work

    +\t

    Most PHP examples embed <php-snippet> elements from WordPress Playground. The first Run click on a page boots a single shared PHP+WordPress runtime in your browser via WebAssembly and unzips the toolkit into it. Subsequent snippets reuse the same runtime, so only the first run pays the boot cost.

    +\t

    Examples that need a local listening port, a web server, or deployment-specific config are presented as static code blocks so the page does not imply they can run in the browser sandbox.

    +\t

    The toolkit bundle (docs/assets/php-toolkit.zip, ≈1.8 MB) ships with the docs, so no third-party CDN is involved.

    +
    + + + +''' + + +def main(): + with open(os.path.join(DOCS, 'index.html'), 'w') as f: + f.write(render_index()) + + for slug, title, lede, install, sections in COMPONENTS: + out_dir = os.path.join(DOCS, slug) + os.makedirs(out_dir, exist_ok=True) + with open(os.path.join(out_dir, 'index.html'), 'w') as f: + f.write(render_component(slug, title, lede, install, sections)) + print(f' wrote {slug}/index.html') + + +if __name__ == '__main__': + main() diff --git a/bin/build-reference.py b/bin/build-reference.py new file mode 100644 index 000000000..bc95c8991 --- /dev/null +++ b/bin/build-reference.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 +"""Generates docs/reference/.html for components not already hand-written. +Pulls catalog data from _docs_components.py and emits the concept-guide shape: +lede + install + context paragraphs + minimal example + refinements + pitfalls + see also. + +The hand-written reference pages (html, zip) are skipped — they live as +authored HTML files and we don't overwrite them. +""" + +import json +import os +import re +import sys +from html import escape as h, unescape + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from _docs_components import COMPONENTS, COMPONENT_RELATIONS, CREDITS + +DOCS = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'docs', 'reference') +EXPECTED_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '_expected_outputs.json') +ASSET_VERSION = '20260429-rewrite' + +EXPECTED = {} +if os.path.exists(EXPECTED_PATH): + with open(EXPECTED_PATH) as f: + EXPECTED = {tuple(k.split('::')): v for k, v in json.load(f).items()} + +# Skip the hand-written ones. +SKIP = {'html', 'zip'} + +PAGE_HEAD = ''' + + + + +{title} — PHP Toolkit reference + + + + + + + +
    +\tPHP Toolkit +\t +
    + +
    +''' + +PAGE_FOOT = '''\t +
    + + + + +''' + + +def slugify(text): + return re.sub(r'[^\w\s-]', '', text.lower()).strip().replace(' ', '-') + + +def split_pitfalls(body_html): + """Pull out paragraphs that begin with 'Footgun:' or 'Gotcha:' and return them + as separate pitfall callouts. Return (rest_html, [pitfall_html, ...]).""" + pitfalls = [] + rest = [] + for chunk in re.findall(r'

    .*?

    ', body_html, flags=re.DOTALL): + plain = re.sub(r'<[^>]+>', '', chunk).strip() + if plain.lower().startswith(('footgun', 'gotcha')): + inner = chunk[3:-4] # strip

    ...

    + inner = re.sub(r'^(Footgun|Gotcha)[^<]*\s*[—:.\s]*', '', inner) + inner = re.sub(r'^(Footgun|Gotcha)[^a-z<]*', '', inner) + pitfalls.append(inner.strip()) + else: + rest.append(chunk) + return ''.join(rest), pitfalls + + +def snippet_block(slug, name, code, runnable=True): + safe = code.rstrip().replace('\n{expected_safe}\n\n' + ) + runnable_attr = '' if runnable else ' runnable="false"' + return ( + f'\n' + f'\n' + f'{expected_block}' + f'\n' + ) + + +def render_example(slug, snippet): + name, code = snippet[0], snippet[1] + runnable = len(snippet) < 3 or snippet[2] + return snippet_block(slug, name, code, runnable) + + +def sidebar(current_slug): + items = [] + for slug, title, _, _, _ in COMPONENTS: + is_legacy = slug in SKIP or slug in { + 'bytestream', 'filesystem', 'blockparser', 'markdown', 'xml', 'encoding', + 'dataliberation', 'git', 'merge', 'httpclient', 'httpserver', 'corsproxy', + 'cli', 'polyfill', 'blueprints', 'coding-standards', + } + # Reference page exists for skipped (handwritten) and the ones we generate here. + href = f'{slug}.html' + cls = ' class="current"' if slug == current_slug else '' + items.append(f'\t\t\t{h(title)}') + return ( + '\t\n' + ) + + +def render_component(slug, title, lede, install, sections): + # Separate the "Why this exists" intro from the worked sections. + purpose_html = '' + pitfalls_from_purpose = [] + usage = sections + if sections and sections[0][0].lower() == 'why this exists': + _, body, _ = sections[0] + purpose_html, pitfalls_from_purpose = split_pitfalls(unescape(body or '')) + usage = sections[1:] + + out = [PAGE_HEAD.format( + title=h(title), + description=h(re.sub(r'<[^>]+>', '', lede)), + asset_version=ASSET_VERSION, + )] + out.append(sidebar(slug)) + out.append('\t
    \n\n') + out.append(f'

    {h(title)}

    \n\n') + out.append(f'

    {lede}

    \n\n') + if install: + out.append(f'
    composer require {h(install)}
    \n\n') + if slug in CREDITS: + title_credit, body_credit = CREDITS[slug] + out.append( + '\n\n' + ) + if purpose_html: + out.append(unescape(purpose_html) + '\n\n') + + # Worked examples + accumulated pitfalls. + pitfalls = list(pitfalls_from_purpose) + minimal_emitted = False + for heading, body_html, snippet in usage: + # Pull pitfalls out of section body too. + rest, found = split_pitfalls(unescape(body_html or '')) + pitfalls.extend(found) + h2 = heading + if not minimal_emitted and snippet: + h2 = 'A minimal example' + minimal_emitted = True + elif snippet: + h2 = f'Refinement: {heading[0].lower() + heading[1:]}' if heading else heading + out.append(f'

    {h(h2)}

    \n\n') + if rest: + out.append(rest + '\n\n') + if snippet: + out.append(render_example(slug, snippet) + '\n') + + if pitfalls: + out.append('

    Pitfalls

    \n\n') + for p in pitfalls: + out.append(f'\n\n') + + related = COMPONENT_RELATIONS.get(slug, ()) + if related: + out.append('

    See also

    \n\n') + out.append('\n\n') + + out.append(PAGE_FOOT) + return ''.join(out) + + +def main(): + os.makedirs(DOCS, exist_ok=True) + for slug, title, lede, install, sections in COMPONENTS: + if slug in SKIP: + continue + out = render_component(slug, title, lede, install, sections) + path = os.path.join(DOCS, f'{slug}.html') + with open(path, 'w') as f: + f.write(out) + print(f'wrote reference/{slug}.html') + + +if __name__ == '__main__': + main() diff --git a/bin/run-snippets.py b/bin/run-snippets.py new file mode 100755 index 000000000..950f37747 --- /dev/null +++ b/bin/run-snippets.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +""" +Runs every PHP snippet in bin/_docs_components.py against the local +toolkit (`composer install` first, so vendor/autoload.php exists) and +captures stdout. Used in two ways: + + bin/run-snippets.py --update Regenerate bin/_expected_outputs.json + from the snippets that ran successfully. + bin/run-snippets.py --check Run every snippet, compare against the + committed JSON. Exit nonzero on drift. + Used by .github/workflows/snippet-tests.yml. + +Snippets reference '/wordpress/wp-content/php-toolkit/vendor/autoload.php' — +the path that exists inside Playground. The runner rewrites that to the +repo's local vendor/autoload.php before executing. + +Snippets marked non-runnable in the catalog are skipped. Snippets that need +WordPress, network access, or a listening TCP port may run locally but avoid +committing expected output because their stdout is environment-dependent. +""" + +import argparse +import json +import os +import re +import subprocess +import sys +import tempfile + +THIS = os.path.dirname(os.path.abspath(__file__)) +ROOT = os.path.dirname(THIS) +sys.path.insert(0, THIS) +from _docs_components import COMPONENTS # noqa: E402 + +VENDOR_AUTOLOAD = os.path.join(ROOT, 'vendor', 'autoload.php') +EXPECTED_PATH = os.path.join(THIS, '_expected_outputs.json') + +# Snippets that can run but whose output isn't stable (real network, timestamps, +# host-specific values). They're verified to exit 0 but their stdout isn't +# captured into the JSON, so the docs page boots Playground at click time. +NO_EXPECTED = { + ('httpclient', 'get.php'), + ('httpclient', 'post.php'), + ('httpclient', 'progress.php'), + ('httpclient', 'sliding-window.php'), + ('httpclient', 'resume-download.php'), + ('httpclient', 'stream-unzip.php'), + ('httpclient', 'fan-out.php'), + ('httpclient', 'stream-to-disk.php'), +} + +PLAYGROUND_AUTOLOAD = "/wordpress/wp-content/php-toolkit/vendor/autoload.php" + +# Tiny polyfill so WordPress-only globals don't break local runs. +# Injected after the autoload require so WP_Block_Parser exists. +LOCAL_PRELUDE = """ +if ( ! function_exists( 'parse_blocks' ) ) { +\tfunction parse_blocks( $content ) { +\t\treturn ( new WP_Block_Parser() )->parse( $content ); +\t} +} +""" + + +def rewrite(code): + code = code.replace(PLAYGROUND_AUTOLOAD, VENDOR_AUTOLOAD) + match = re.search(r"require\s+'[^']*vendor/autoload\.php';", code) + if match: + insert_at = match.end() + code = code[:insert_at] + LOCAL_PRELUDE + code[insert_at:] + return code + + +def run_one(code, timeout=15): + with tempfile.NamedTemporaryFile(suffix='.php', mode='w', delete=False) as f: + f.write(rewrite(code)) + path = f.name + try: + proc = subprocess.run( + ['php', '-d', 'display_errors=stderr', path], + capture_output=True, text=True, timeout=timeout, + ) + return proc.returncode, proc.stdout, proc.stderr + except subprocess.TimeoutExpired: + return -1, '', f'TIMEOUT after {timeout}s' + finally: + try: + os.unlink(path) + except OSError: + pass + + +def normalize(text): + """Strip noise that varies between runs (tempfile names, timestamps).""" + # tempnam paths + text = re.sub(r'/tmp/\w+\.zip', '/tmp/.zip', text) + text = re.sub(r'(/tmp/\w+)(\.epub|\.tmp\.[a-f0-9]+)?', r'/tmp/\2', text) + text = re.sub(r'sys_get_temp_dir\(\) \. \'/[^\']+', "sys_get_temp_dir() . '/", text) + # uniqid suffixes from sys_get_temp_dir paths in code + text = re.sub(r'/(toolkit|atomic|copytree|big|orig|repacked|app|book|demo|sample|hash|gz|dl)-[a-f0-9]+', r'/\1-XXXXXX', text) + # Random nonces / hex strings + text = re.sub(r'\bnonce(?:: |=")([0-9a-f]{16})"?', lambda m: m.group(0).replace(m.group(1), ''), text) + text = re.sub(r'\bcommit: [0-9a-f]{40}\b', 'commit: ', text) + text = re.sub(r'\bHEAD:\s+[0-9a-f]{40}', 'HEAD: ', text) + text = re.sub(r'\boid: [0-9a-f]{40}\b', 'oid: ', text) + text = re.sub(r'merge head: [0-9a-f]{40}', 'merge head: ', text) + text = re.sub(r'\b[a-f0-9]{7} ', ' ', text) + # Memory numbers + text = re.sub(r'Peak memory: [\d.]+ MB', 'Peak memory: MB', text) + return text + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument('--update', action='store_true', help='Regenerate _expected_outputs.json') + ap.add_argument('--check', action='store_true', help='Verify against _expected_outputs.json') + ap.add_argument('--filter', default=None, help='Only run snippets whose slug or filename match this substring') + args = ap.parse_args() + + if not args.update and not args.check: + args.check = True + + if not os.path.exists(VENDOR_AUTOLOAD): + print(f'ERROR: {VENDOR_AUTOLOAD} not found. Run `composer install` first.', file=sys.stderr) + sys.exit(2) + + existing = {} + if os.path.exists(EXPECTED_PATH): + with open(EXPECTED_PATH) as f: + existing = {tuple(k.split('::')): v for k, v in json.load(f).items()} + + new = {} + failures = [] + skipped = 0 + matched = 0 + drift = [] + + for slug, _, _, _, sections in COMPONENTS: + for heading, _, snippet in sections: + if not snippet: + continue + filename, code = snippet[0], snippet[1] + runnable = len(snippet) < 3 or snippet[2] + if not runnable: + continue + if args.filter and args.filter not in slug and args.filter not in filename: + continue + rc, stdout, stderr = run_one(code) + if rc != 0: + # Snippet can't run locally — leave it out of JSON. The docs + # site will boot Playground for it at click time. + failures.append((slug, filename, stderr.strip().splitlines()[:2])) + skipped += 1 + continue + + key = (slug, filename) + if key in NO_EXPECTED: + # Ran successfully but we don't compare output. Don't store. + matched += 1 + continue + + normalized = normalize(stdout) + new[key] = normalized + + if args.check: + expected = existing.get(key) + if expected is None: + drift.append((slug, filename, 'NEW (run --update to add)')) + elif normalize(expected) != normalized: + drift.append((slug, filename, 'OUTPUT CHANGED')) + else: + matched += 1 + else: + matched += 1 + + print(f'\nRan {matched + len(drift)} snippets; {skipped} couldn\'t run locally.') + for slug, filename, why in failures: + why_text = ' '.join(why) if why else '(no stderr)' + print(f' skip {slug}/{filename:<32} {why_text[:80]}') + if args.check: + for slug, filename, kind in drift: + print(f' DRIFT {slug}/{filename:<32} {kind}') + + if args.update: + joined = {f'{k[0]}::{k[1]}': v for k, v in sorted(new.items())} + with open(EXPECTED_PATH, 'w') as f: + json.dump(joined, f, indent=2, sort_keys=True) + f.write('\n') + print(f'\nWrote {len(joined)} expected outputs to {EXPECTED_PATH}') + sys.exit(0) + + if drift: + print(f'\n{len(drift)} snippet(s) drifted. Run `bin/run-snippets.py --update` to refresh.') + sys.exit(1) + print('\nAll snippets match expected outputs.') + + +if __name__ == '__main__': + main() diff --git a/bin/serve-docs.py b/bin/serve-docs.py new file mode 100755 index 000000000..da64cc89c --- /dev/null +++ b/bin/serve-docs.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +""" +Local dev server for docs/. Adds CORS headers so the WordPress Playground +iframe can fetch docs/assets/php-toolkit.zip across origins. + +GitHub Pages serves Access-Control-Allow-Origin: * by default, so this +server is only needed for `python3 -m http.server`-equivalent local previews. + +Usage: + python3 bin/serve-docs.py [port] +""" + +import http.server +import os +import sys + +PORT = int(sys.argv[1]) if len(sys.argv) > 1 else 8787 +DOCS = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'docs') + + +class CorsHandler(http.server.SimpleHTTPRequestHandler): + def end_headers(self): + self.send_header('Access-Control-Allow-Origin', '*') + self.send_header('Access-Control-Allow-Headers', '*') + super().end_headers() + + +os.chdir(DOCS) +print(f'Serving {DOCS} on http://localhost:{PORT}/') +http.server.ThreadingHTTPServer(('', PORT), CorsHandler).serve_forever() diff --git a/components/BlockParser/README.md b/components/BlockParser/README.md index 2cf95fb97..fa0d7b2b6 100644 --- a/components/BlockParser/README.md +++ b/components/BlockParser/README.md @@ -1,218 +1,137 @@ # BlockParser -A standalone extraction of WordPress core's block parser. It takes a document containing WordPress block markup (`...`) and returns a structured array of parsed blocks with their attributes, inner HTML, inner blocks, and content interleaving. This is the same parser that powers `parse_blocks()` in WordPress core, packaged as an independent library with no WordPress dependency. +## Why this exists -## Installation - -``` -composer require wp-php-toolkit/blockparser -``` - -## Quick Start - -```php -$document = << -

    Welcome

    - +WordPress stores post content as annotated HTML. Instead of inventing a separate file format, it embeds block boundaries directly inside HTML comments: +```html -

    Hello from the block editor.

    +

    Hello, world.

    -HTML; -$parser = new WP_Block_Parser(); -$blocks = $parser->parse( $document ); - -foreach ( $blocks as $block ) { - if ( 'core/heading' === $block['blockName'] ) { - echo 'Found heading: ' . strip_tags( $block['innerHTML'] ); - // "Found heading: Welcome" - } -} + +
    + ``` -## Usage - -### Parsing a Document +Every WordPress editor, REST API response, and block renderer needs to turn that serialized markup into a structured tree. WordPress core ships `WP_Block_Parser` to do exactly that — but it's buried inside WordPress itself, tied to the full WordPress load. This component extracts it so you can parse block markup anywhere: CLI tools, build scripts, data-migration pipelines, standalone PHP apps — without booting WordPress. -Call `parse()` with any string containing block markup. It returns an array of block arrays, each with the following keys: +## How it works -```php -$parser = new WP_Block_Parser(); -$blocks = $parser->parse( $document ); - -// Each element in $blocks is an array: -// array( -// 'blockName' => 'core/paragraph', // Fully-qualified block name, or null for freeform HTML. -// 'attrs' => array(), // Attributes from the block comment delimiter. -// 'innerBlocks' => array(), // Nested blocks (same structure, recursive). -// 'innerHTML' => '

    Text

    ', // The HTML inside the block, with inner blocks removed. -// 'innerContent' => array( '

    Text

    ' ), // Interleaved HTML strings and null markers for inner block positions. -// ) -``` +The parser is a single-pass, stack-based scanner. It moves forward through the document looking for HTML comments that follow the block annotation pattern. When it finds an opening comment like ``, it: -### Block Types +1. Decodes the JSON attributes from the comment body. +2. Pushes a frame onto a stack, recording the block name, attributes, and the byte offset where the block started. +3. Keeps scanning, collecting the raw HTML between the opening and closing comments as `innerHTML`. +4. If it encounters another `` before the closing comment, it recurses — pushing a new frame for the inner block. +5. When it finds a closing comment (``), it pops the frame, attaches any collected inner blocks, and appends the completed block to its parent. -The parser recognizes three kinds of block tokens: +Freeform content between blocks — plain HTML with no block annotations — becomes a "classic block" with `blockName` set to `null`. -**Standard blocks** have an opener and closer: +The `innerContent` array is the most subtle part of the output. It interleaves child block positions with raw HTML chunks, letting renderers reconstruct the exact original layout. This is how the columns block describes which raw HTML wraps each inner column. -```php -$blocks = ( new WP_Block_Parser() )->parse( - '

    Hello

    ' -); -// $blocks[0]['blockName'] === 'core/paragraph' -// $blocks[0]['innerHTML'] === '

    Hello

    ' -``` +## Usage -**Self-closing (void) blocks** end with `/-→`: +### Parse a post's block content ```php -$blocks = ( new WP_Block_Parser() )->parse( - '' -); -// $blocks[0]['blockName'] === 'core/spacer' -// $blocks[0]['attrs'] === array( 'height' => '50px' ) -// $blocks[0]['innerHTML'] === '' -``` +use WordPress\BlockParser\WP_Block_Parser; -**Freeform HTML** is any content outside of block delimiters: +$parser = new WP_Block_Parser(); +$blocks = $parser->parse( $post_content ); -```php -$blocks = ( new WP_Block_Parser() )->parse( - '

    Just some HTML, no blocks here.

    ' -); -// $blocks[0]['blockName'] === null -// $blocks[0]['innerHTML'] === '

    Just some HTML, no blocks here.

    ' +foreach ( $blocks as $block ) { + echo $block['blockName']; // e.g. "core/paragraph" + echo $block['innerHTML']; // the raw HTML inside the block + // $block['attrs'] — decoded JSON attributes + // $block['innerBlocks'] — nested blocks (same structure, recursive) + // $block['innerContent'] — interleaved HTML chunks + child-block slots +} ``` -### Block Attributes +### Inspect block attributes -Attributes are encoded as JSON inside the block comment delimiter. The parser decodes them into a PHP associative array: +Attributes are encoded as JSON in the opening comment and decoded automatically: ```php -$blocks = ( new WP_Block_Parser() )->parse( - '' . - '
    ' . - '' -); - -$attrs = $blocks[0]['attrs']; -// array( -// 'id' => 123, -// 'sizeSlug' => 'large', -// 'linkDestination' => 'none', -// ) +$markup = '' + . '
    ...
    ' + . ''; + +$blocks = $parser->parse( $markup ); +echo $blocks[0]['attrs']['sizeSlug']; // "large" ``` -### Nested Blocks +### Walk a nested block tree -Blocks can contain other blocks. Inner blocks appear in the `innerBlocks` array, and `innerContent` interleaves the HTML fragments with `null` markers showing where each inner block was located: +Blocks can contain other blocks. The `innerBlocks` key holds them recursively: ```php -$document = << -
    - -
    - -

    Left column

    - -
    - - -
    - -

    Right column

    - -
    - -
    - -HTML; +function walk( array $blocks, int $depth = 0 ): void { + foreach ( $blocks as $block ) { + if ( $block['blockName'] === null ) { + continue; // skip freeform HTML between blocks + } + echo str_repeat( ' ', $depth ) . $block['blockName'] . "\n"; + walk( $block['innerBlocks'], $depth + 1 ); + } +} -$parser = new WP_Block_Parser(); -$blocks = $parser->parse( $document ); - -$columns = $blocks[0]; -// $columns['blockName'] === 'core/columns' -// count( $columns['innerBlocks'] ) === 2 - -$left_column = $columns['innerBlocks'][0]; -// $left_column['blockName'] === 'core/column' -// $left_column['innerBlocks'][0]['blockName'] === 'core/paragraph' - -// innerContent shows the interleaving of HTML and inner block positions: -// array( -// '
    \n', // HTML before first inner block -// null, // Position of first inner block (core/column) -// '\n', // HTML between inner blocks -// null, // Position of second inner block (core/column) -// '\n
    \n', // HTML after last inner block -// ) +walk( $parser->parse( $post_content ) ); +// core/columns +// core/column +// core/paragraph +// core/column +// core/image ``` -### Namespaced Blocks +### Reconstruct output using innerContent -The parser handles both core blocks (`wp:paragraph`) and namespaced third-party blocks (`wp:my-plugin/custom-block`). Block names without an explicit namespace are prefixed with `core/`: +The `innerContent` array lets you rebuild the original markup while swapping in rendered child blocks: ```php -$blocks = ( new WP_Block_Parser() )->parse( - '' . - '
    Great product!
    ' . - '' -); -// $blocks[0]['blockName'] === 'my-plugin/testimonial' -// $blocks[0]['attrs'] === array( 'author' => 'Jane' ) -``` +function render_block( array $block ): string { + $output = ''; + $child_index = 0; + + foreach ( $block['innerContent'] as $chunk ) { + if ( is_string( $chunk ) ) { + $output .= $chunk; + } else { + // null = "insert rendered child block here" + $output .= render_block( $block['innerBlocks'][ $child_index++ ] ); + } + } -### Error Recovery + return $output; +} +``` -The parser is designed to never fail. When it encounters malformed markup such as missing closers or mismatched block names, it produces a best-effort parse rather than returning an error: +### Find all blocks of a specific type ```php -// Missing closer -- the parser treats it as implicitly closed. -$blocks = ( new WP_Block_Parser() )->parse( - '

    No closer here' -); -// $blocks[0]['blockName'] === 'core/paragraph' -// $blocks[0]['innerHTML'] === '

    No closer here' -``` - -## API Reference +function find_blocks( array $blocks, string $name ): array { + $found = array(); + foreach ( $blocks as $block ) { + if ( $block['blockName'] === $name ) { + $found[] = $block; + } + $found = array_merge( $found, find_blocks( $block['innerBlocks'], $name ) ); + } + return $found; +} -### WP_Block_Parser +$images = find_blocks( $parser->parse( $post_content ), 'core/image' ); +``` -| Method | Description | -|--------|-------------| -| `parse( $document )` | Parse block markup and return an array of block structures | +## Block structure reference -### Block Structure (array keys) +Each parsed block is an associative array: | Key | Type | Description | |-----|------|-------------| -| `blockName` | `string\|null` | Fully-qualified name (e.g. `core/paragraph`), or `null` for freeform HTML | -| `attrs` | `array` | Block attributes decoded from the JSON in the comment delimiter | -| `innerBlocks` | `array` | Nested blocks, same structure recursively | -| `innerHTML` | `string` | HTML content with inner blocks stripped out | -| `innerContent` | `array` | Interleaved HTML strings and `null` markers for inner block positions | - -### WP_Block_Parser_Block - -| Property | Type | Description | -|----------|------|-------------| -| `$blockName` | `string\|null` | Block name | -| `$attrs` | `array\|null` | Block attributes | -| `$innerBlocks` | `array` | Nested block instances | -| `$innerHTML` | `string` | Inner HTML content | -| `$innerContent` | `array` | Interleaved content with `null` placeholders | - -## Attribution - -This component is extracted from [WordPress core](https://github.com/WordPress/wordpress-develop). The `WP_Block_Parser`, `WP_Block_Parser_Block`, and `WP_Block_Parser_Frame` classes are maintained as part of the WordPress block editor infrastructure. Licensed under GPL v2. - -## Requirements - -- PHP 7.2+ -- No external dependencies +| `blockName` | `string\|null` | Namespaced block name, e.g. `"core/paragraph"`. `null` for classic/freeform content between blocks. | +| `attrs` | `array` | Decoded JSON attributes from the opening comment. Empty array if none. | +| `innerBlocks` | `array` | Recursively parsed child blocks in order of appearance. | +| `innerHTML` | `string` | The full raw HTML between the opening and closing comments, including inner block markup verbatim. | +| `innerContent` | `array` | Interleaved array: strings are raw HTML chunks, `null` values mark positions where a child block from `innerBlocks` should be inserted. | diff --git a/components/Filesystem/README.md b/components/Filesystem/README.md index 17a605415..153d0e525 100644 --- a/components/Filesystem/README.md +++ b/components/Filesystem/README.md @@ -1,240 +1,141 @@ # Filesystem -A unified filesystem abstraction that lets you work with local disks, in-memory trees, SQLite-backed storage, and other backends through a single interface. Every implementation uses forward slashes as path separators regardless of the host OS, so code that works on Linux works identically on Windows and macOS. +## Why this exists -## Installation +PHP's built-in file functions (`file_get_contents`, `fopen`, `mkdir`, etc.) are tightly coupled to the local disk. That's fine for simple scripts, but it creates a real problem when you want to: -```bash -composer require wp-php-toolkit/filesystem -``` - -## Quick Start - -```php -use WordPress\Filesystem\InMemoryFilesystem; +- **Test code without touching the disk.** Unit tests that create real files are slow, fragile, and leave cleanup responsibilities behind. +- **Work with non-disk storage.** WordPress Playground runs entirely in the browser using a virtual filesystem backed by a SQLite database. Your code needs to work the same way against both a real disk and an in-memory tree. +- **Operate on ZIP archives as if they were directories.** Instead of extracting first and then reading, you want to walk a ZIP file the same way you'd walk a folder. +- **Stay portable across operating systems.** Windows uses backslashes; everything else uses forward slashes. Code that hardcodes separators breaks on the other platform. -$fs = InMemoryFilesystem::create(); -$fs->mkdir( '/docs' ); -$fs->put_contents( '/docs/readme.txt', 'Hello, world!' ); -echo $fs->get_contents( '/docs/readme.txt' ); // "Hello, world!" -``` +This component defines a single `Filesystem` interface and several implementations behind it. Write your code against the interface once, and it works against any backend. -## Usage - -### Local Filesystem - -`LocalFilesystem` wraps the real disk. Pass a root directory to `create()` and all paths are resolved relative to it. - -```php -use WordPress\Filesystem\LocalFilesystem; +## How it works -$fs = LocalFilesystem::create( '/var/www/mysite' ); +The `Filesystem` interface defines the operations every backend must support: listing directories, reading and writing files, checking existence, copying, renaming, deleting. Implementations handle the translation to whatever storage mechanism is underneath. -// Write and read files -$fs->put_contents( '/config.json', '{"debug": true}' ); -echo $fs->get_contents( '/config.json' ); // '{"debug": true}' +All paths use forward slashes (`/`) regardless of OS. On Windows, the `LocalFilesystem` translates them to backslashes internally, but your code never sees that. -// Directory operations -$fs->mkdir( '/uploads/2024', array( 'recursive' => true ) ); -$fs->put_contents( '/uploads/2024/photo.txt', 'image data here' ); +Reads and writes are stream-based under the hood. `open_read_stream()` returns a handle you can read in chunks; `open_write_stream()` gives you a handle to write to. `get_contents()` and `put_contents()` are convenience wrappers that read or write the entire file at once. -// List directory contents -$entries = $fs->ls( '/uploads/2024' ); // ['photo.txt'] +The `FilesystemVisitor` handles recursive tree traversal, emitting events for each directory and file it encounters. -// Check paths -$fs->is_dir( '/uploads' ); // true -$fs->is_file( '/config.json' ); // true -$fs->exists( '/missing' ); // false -``` +### The implementations -Without a root argument, `LocalFilesystem::create()` defaults to the system root (`/` on Unix, the system drive on Windows). +**`LocalFilesystem`** — wraps PHP's built-in file functions. Works on the actual disk. -### In-Memory Filesystem +**`InMemoryFilesystem`** — stores everything in a PHP array. Fast, zero I/O, perfect for tests and ephemeral scratch space. -`InMemoryFilesystem` stores everything in PHP arrays. It is useful for tests, temporary processing, and anywhere you need a fast, disposable filesystem. +**`SQLiteFilesystem`** — stores files in a SQLite database. Used by WordPress Playground to persist a WordPress installation in a single database file that can be serialized, snapshotted, and restored. -```php -use WordPress\Filesystem\InMemoryFilesystem; +**`ZipFilesystem`** (from the Zip component) — mounts a ZIP archive as a read-only directory tree. -$fs = InMemoryFilesystem::create(); +**`UploadedFilesystem`** — wraps another filesystem and tracks which paths were written, for auditing what an operation produced. -$fs->mkdir( '/src/components', array( 'recursive' => true ) ); -$fs->put_contents( '/src/components/button.php', 'put_contents( '/src/components/form.php', 'ls( '/src/components' ); // ['button.php', 'form.php'] -``` +Many factory methods wrap a filesystem in a `ChrootLayer`, which jails all path operations to a specific root directory. This prevents code from accidentally escaping to `/` and makes it safe to hand a filesystem object to untrusted code. -### SQLite Filesystem +## Usage -`SQLiteFilesystem` persists files and directories in a SQLite database. It requires the `sqlite3` PHP extension (dev-only dependency, not required by the library at runtime). +### Read a file ```php -use WordPress\Filesystem\SQLiteFilesystem; - -// In-memory SQLite database -$fs = SQLiteFilesystem::create( ':memory:' ); +use WordPress\Filesystem\LocalFilesystem; -// Or persist to a file -$fs = SQLiteFilesystem::create( '/tmp/my-files.sqlite' ); +$fs = new LocalFilesystem( '/var/www/html' ); -$fs->mkdir( '/data' ); -$fs->put_contents( '/data/report.csv', 'id,name\n1,Alice' ); -echo $fs->get_contents( '/data/report.csv' ); +if ( $fs->is_file( '/wp-config.php' ) ) { + $contents = $fs->get_contents( '/wp-config.php' ); +} ``` -### File and Directory Operations - -All filesystem implementations share the same interface. These operations work identically across backends. +### Write a file ```php -// Rename (move) a file -$fs->put_contents( '/old-name.txt', 'content' ); -$fs->rename( '/old-name.txt', '/new-name.txt' ); - -// Copy a file -$fs->put_contents( '/source.txt', 'content' ); -$fs->copy( '/source.txt', '/dest.txt' ); - -// Copy a directory tree -$fs->mkdir( '/src/lib', array( 'recursive' => true ) ); -$fs->put_contents( '/src/lib/utils.php', 'copy( '/src', '/backup', array( 'recursive' => true ) ); -echo $fs->get_contents( '/backup/lib/utils.php' ); // 'rm( '/dest.txt' ); -$fs->rmdir( '/backup', array( 'recursive' => true ) ); +$fs->put_contents( '/uploads/hello.txt', 'Hello, world.' ); ``` -### Streaming Reads and Writes - -Every filesystem can open byte streams for reading and writing. This integrates with the ByteStream component for chunk-based processing of large files. +### List a directory ```php -// Write via stream -$writer = $fs->open_write_stream( '/output.bin' ); -$writer->append_bytes( 'chunk 1' ); -$writer->append_bytes( 'chunk 2' ); -$writer->close_writing(); - -// Read via stream -$reader = $fs->open_read_stream( '/output.bin' ); -$contents = $reader->consume_all(); -$reader->close_reading(); +foreach ( $fs->ls( '/wp-content/plugins' ) as $name ) { + echo $name . "\n"; // plugin directory names only, not full paths +} ``` -### Copying Between Filesystems +### Use an in-memory filesystem for tests -The `copy_between_filesystems()` function streams data from one filesystem to another, even across different backends. +Because your code accepts a `Filesystem` interface, you can swap in `InMemoryFilesystem` in tests without changing anything else: ```php -use WordPress\Filesystem\LocalFilesystem; use WordPress\Filesystem\InMemoryFilesystem; -use function WordPress\Filesystem\copy_between_filesystems; - -$local = LocalFilesystem::create( '/var/www/site' ); -$memory = InMemoryFilesystem::create(); +$fs = new InMemoryFilesystem(); +$fs->put_contents( '/config.json', json_encode( [ 'debug' => true ] ) ); -// Copy an entire directory tree from disk into memory -copy_between_filesystems( array( - 'source_filesystem' => $local, - 'source_path' => '/wp-content/themes/flavor', - 'target_filesystem' => $memory, - 'target_path' => '/theme', -) ); - -echo $memory->get_contents( '/theme/style.css' ); +// Pass $fs to the code under test — it never touches the real disk. +$result = my_config_loader( $fs ); ``` -### Traversing a Filesystem - -`FilesystemVisitor` walks a filesystem tree depth-first, emitting enter and exit events for each directory along with its files. +### Walk a directory tree ```php use WordPress\Filesystem\Visitor\FilesystemVisitor; -use WordPress\Filesystem\Visitor\FileVisitorEvent; -$visitor = new FilesystemVisitor( $fs ); +$visitor = new FilesystemVisitor( $fs, '/' ); while ( $visitor->next() ) { $event = $visitor->get_event(); - if ( $event->is_entering() ) { - echo "Entering: " . $event->dir . "\n"; - foreach ( $event->files as $file ) { - echo " File: " . $file . "\n"; - } - } + echo $event->get_path() . ( $event->is_dir() ? '/' : '' ) . "\n"; } ``` -### Path Helpers +### Stream large files -The Filesystem component provides Unix-style path utilities that behave consistently on every OS. +For large files, streaming avoids loading everything into memory at once: ```php -use function WordPress\Filesystem\wp_join_unix_paths; -use function WordPress\Filesystem\wp_unix_dirname; -use function WordPress\Filesystem\wp_unix_path_resolve_dots; +$read_stream = $fs->open_read_stream( '/large-export.sql' ); +$write_stream = $fs->open_write_stream( '/large-export-copy.sql' ); -// Join path segments, collapsing duplicate slashes -echo wp_join_unix_paths( '/var/www', 'site', 'index.php' ); -// "/var/www/site/index.php" +while ( ! $read_stream->is_finished() ) { + $chunk = $read_stream->read( 65536 ); // 64 KB at a time + $write_stream->write( $chunk ); +} + +$read_stream->close(); +$write_stream->close(); +``` -// Get the parent directory -echo wp_unix_dirname( '/var/www/site/index.php' ); -// "/var/www/site" +### Copy files between different backends -// Resolve . and .. segments -echo wp_unix_path_resolve_dots( '/var/www/site/../other/./page.php' ); -// "/var/www/other/page.php" +Because every backend speaks the same interface, you can copy between them directly: + +```php +use WordPress\Filesystem\LocalFilesystem; +use WordPress\Filesystem\InMemoryFilesystem; +use WordPress\Filesystem\Visitor\FilesystemVisitor; + +$local = new LocalFilesystem( '/var/www/html' ); +$memory = new InMemoryFilesystem(); + +// Copy everything from disk to memory. +$visitor = new FilesystemVisitor( $local, '/' ); +while ( $visitor->next() ) { + $event = $visitor->get_event(); + $path = $event->get_path(); + if ( $event->is_file() ) { + $memory->put_contents( $path, $local->get_contents( $path ) ); + } elseif ( $event->is_dir() ) { + $memory->mkdir( $path ); + } +} ``` -## API Reference - -### Filesystem Interface - -All implementations provide these methods: - -| Method | Description | -|---|---| -| `ls( $dir )` | List entries in a directory | -| `is_dir( $path )` | Check if path is a directory | -| `is_file( $path )` | Check if path is a file | -| `exists( $path )` | Check if path exists | -| `mkdir( $path, $options )` | Create a directory. Use `['recursive' => true]` for nested paths | -| `rm( $path )` | Remove a file | -| `rmdir( $path, $options )` | Remove a directory. Use `['recursive' => true]` for non-empty dirs | -| `put_contents( $path, $data )` | Write a string to a file | -| `get_contents( $path )` | Read a file into a string | -| `open_read_stream( $path )` | Open a `ByteReadStream` for chunk-based reading | -| `open_write_stream( $path )` | Open a `ByteWriteStream` for chunk-based writing | -| `copy( $from, $to, $options )` | Copy a file or directory | -| `rename( $from, $to )` | Move/rename a file or directory | - -### Implementations - -| Class | Description | -|---|---| -| `LocalFilesystem` | Wraps the real disk via `LocalFilesystem::create( $root )` | -| `InMemoryFilesystem` | Array-backed filesystem via `InMemoryFilesystem::create()` | -| `SQLiteFilesystem` | SQLite-backed filesystem via `SQLiteFilesystem::create( $path )` | -| `UploadedFilesystem` | Read-only filesystem for handling REST API file uploads | - -Other packages extend this interface with additional backends: `GitFilesystem` (from the Git component) and `ZipFilesystem` (from the Zip component). - -### Helper Functions - -| Function | Description | -|---|---| -| `wp_join_unix_paths( ...$segments )` | Join path segments with forward slashes | -| `wp_unix_dirname( $path )` | Get parent directory (Unix semantics on all OSes) | -| `wp_unix_path_resolve_dots( $path )` | Resolve `.` and `..` segments | -| `wp_unix_sys_get_temp_dir()` | Like `sys_get_temp_dir()` but always uses forward slashes | -| `copy_between_filesystems( $args )` | Stream data between two filesystem instances | -| `pipe_stream( $from, $to )` | Pipe a read stream into a write stream | - -## Requirements - -- PHP 7.2+ -- No external dependencies (SQLiteFilesystem requires the `sqlite3` extension, which is a dev-only dependency) +## Path conventions + +- Always use forward slashes: `/wp-content/uploads/photo.jpg`. +- Paths are absolute from the filesystem root. The root itself is `/`. +- On Windows, `LocalFilesystem` converts slashes internally; you never need to use `DIRECTORY_SEPARATOR`. +- `ChrootLayer` jails all paths to the configured root. A path of `/` inside a chrooted filesystem refers to the configured root directory on disk, not the actual system root. diff --git a/components/Git/README.md b/components/Git/README.md index ed61f56d2..0cd7213d1 100644 --- a/components/Git/README.md +++ b/components/Git/README.md @@ -1,229 +1,129 @@ # Git -A pure PHP implementation of a Git client and server. It can create repositories, read and write objects, commit files, manage branches, diff, merge, and communicate with remote servers over HTTP -- all without shelling out to the `git` binary or requiring any native extensions. +## Why this exists -## Installation +Git is typically used through the `git` binary — a compiled C program that reads and writes the repository on disk. That's perfect for most development workflows, but it breaks down in a few important scenarios: -```bash -composer require wp-php-toolkit/git -``` +- **Serverless and sandboxed environments.** WordPress Playground runs PHP entirely in the browser via WebAssembly. There is no OS, no filesystem, no ability to exec a subprocess. Yet Playground needs to clone, commit, and push WordPress installations as Git repositories. +- **Programmatic repository manipulation.** Sometimes you want to create commits, rewrite history, or sync files between repositories entirely from PHP — without spawning a shell process or depending on the `git` binary being installed. +- **Embedding Git into a PHP application.** Build tools, deployment systems, and migration scripts that want to produce or consume Git repositories without a compile-time dependency on libgit2 or similar native libraries. -## Quick Start +This component implements the Git object model, pack protocol, and HTTP smart transport in pure PHP. It can talk to any standard Git remote — GitHub, GitLab, Gitea, self-hosted — using only PHP's HTTP client. -```php -use WordPress\Filesystem\InMemoryFilesystem; -use WordPress\Git\GitRepository; -use WordPress\Git\Model\Commit; - -// Create a repository backed by an in-memory filesystem. -// You can also use a local filesystem for on-disk storage. -$repo = new GitRepository( InMemoryFilesystem::create() ); - -// Commit files directly -- the repository builds the -// blob, tree, and commit objects for you. -$commit_oid = $repo->commit( array( - 'updates' => array( - 'README.md' => '# My Project', - 'src/hello-world.php' => 'read_object_by_path( '/README.md' )->consume_all(); -// "# My Project" -``` +## How it works -## Usage +Git's data model is simpler than it looks. Everything is content-addressed: the SHA-1 hash of an object's content is its name. There are four object types: -### Creating and reading objects +- **blob** — file content, nothing else. +- **tree** — a directory listing: each entry maps a filename to either a blob hash (file) or another tree hash (subdirectory). +- **commit** — a snapshot: it points to a tree (the root of the working directory), zero or more parent commit hashes, and metadata like the author and message. +- **tag** — a named pointer to another object (usually a commit). + +When you commit a file, Git stores the file content as a blob, builds a tree structure from the directory layout, and creates a commit object that records which tree represents the project state at that moment. Branches are just named pointers to commit hashes stored in `refs/heads/`. + +`GitRepository` handles all of this. Give it a `Filesystem` object to use as backing storage, and it reads and writes Git objects directly into the `.git` directory structure. `GitRemote` handles the HTTP smart protocol — fetching a list of remote refs, downloading pack files, uploading missing objects. + +`GitFilesystem` wraps a `GitRepository` and exposes the contents of a specific commit through the standard `Filesystem` interface, so the rest of your code doesn't need to know it's reading from a Git object store. + +## Usage -Every piece of data in Git is an object identified by its SHA-1 hash. You can create blobs, trees, and commits directly: +### Create a new repository and make a commit ```php -use WordPress\Filesystem\InMemoryFilesystem; use WordPress\Git\GitRepository; +use WordPress\Filesystem\InMemoryFilesystem; -$repo = new GitRepository( InMemoryFilesystem::create() ); +$fs = new InMemoryFilesystem(); +$repo = new GitRepository( $fs ); +$repo->init(); -// Store a blob and get its SHA-1 hash. -$blob_oid = $repo->add_object( 'blob', 'Hello, world!' ); -// "5dd01c177f5d7d1be5346a5bc18a569a7410c2ef" +// Stage a file by writing it to the working directory... +$fs->put_contents( '/hello.txt', 'Hello, world.' ); -// Read it back. -$reader = $repo->read_object( $blob_oid ); -$reader->pull( 8096 ); -$data = $reader->peek( 8096 ); -// "Hello, world!" +// ...then commit. +$repo->stage_files( array( 'hello.txt' ) ); +$repo->commit( 'Initial commit', 'Author Name', 'author@example.com' ); ``` -### Committing files - -The `commit()` method handles building the tree hierarchy, creating blob objects, and wiring up parent commits automatically: +### Read a file from a specific commit ```php -use WordPress\Filesystem\InMemoryFilesystem; -use WordPress\Git\GitRepository; +use WordPress\Git\GitFilesystem; + +// Mount the HEAD commit as a filesystem. +$git_fs = new GitFilesystem( $repo, 'HEAD' ); -$repo = new GitRepository( InMemoryFilesystem::create() ); - -// First commit. -$first_oid = $repo->commit( array( - 'updates' => array( - 'dir1/file1.txt' => 'Initial content of file1', - 'dir2/file2.txt' => 'Initial content of file2', - ), -) ); - -// Second commit -- only the changed files are updated. -$second_oid = $repo->commit( array( - 'updates' => array( - 'dir1/file1.txt' => 'Updated file1', - ), -) ); - -// Delete a file in a commit. -$third_oid = $repo->commit( array( - 'deletes' => array( 'dir2/file2.txt' ), -) ); +$contents = $git_fs->get_contents( '/hello.txt' ); +// "Hello, world." ``` -### Branch management +### Clone from a remote ```php -use WordPress\Filesystem\InMemoryFilesystem; use WordPress\Git\GitRepository; +use WordPress\Git\GitRemote; +use WordPress\Filesystem\LocalFilesystem; -$repo = new GitRepository( InMemoryFilesystem::create() ); -$initial_oid = $repo->commit( array( - 'updates' => array( 'file.txt' => 'initial' ), -) ); - -// Create a new branch pointing at the current commit. -$repo->create_branch( 'refs/heads/feature', $initial_oid ); - -// Switch to it. -$repo->checkout( 'refs/heads/feature' ); +$fs = new LocalFilesystem( '/tmp/my-clone' ); +$repo = new GitRepository( $fs ); +$repo->init(); -// Commit on the new branch. -$repo->commit( array( - 'updates' => array( 'file.txt' => 'changed on feature' ), -) ); - -// Switch back to the default branch. -$repo->checkout( 'refs/heads/trunk' ); +$repo->add_remote( 'origin', 'https://github.com/WordPress/wordpress-develop' ); +$remote = $repo->get_remote_client( 'origin' ); -// Read the current branch tip hash. -$head_hash = $repo->get_branch_tip( 'HEAD' ); +// Fetch the default branch. +$remote->fetch( 'refs/heads/trunk' ); ``` -### Merging +### Push to a remote ```php -$repo->checkout( 'refs/heads/trunk' ); -$result = $repo->merge( 'refs/heads/feature' ); - -// $result['new_head'] -- the hash of the merge commit -// $result['conflicts'] -- array of conflicting paths (empty if none) +$remote = $repo->get_remote_client( 'origin' ); +$remote->push( 'refs/heads/my-branch' ); ``` -### Using GitFilesystem - -`GitFilesystem` wraps a `GitRepository` with the standard `Filesystem` interface, so you can read and write files as if working with a regular filesystem. Each write creates a new commit. +### Read the commit log ```php -use WordPress\Filesystem\InMemoryFilesystem; -use WordPress\Git\GitFilesystem; -use WordPress\Git\GitRepository; -use WordPress\Git\Model\Commit; - -$repo = new GitRepository( InMemoryFilesystem::create() ); -$repo->commit( array( - 'updates' => array( - 'README.md' => 'Hello, world!', - 'subdirectory/hello-world.txt' => 'Hello, world!', - ), -) ); - -$fs = GitFilesystem::create( $repo ); +$head = $repo->get_head(); +$commit = $repo->read_commit( $head ); -$fs->ls( '/' ); -// ['README.md', 'subdirectory'] +while ( $commit !== null ) { + echo $commit->message . "\n"; + echo ' by ' . $commit->author_name . ' <' . $commit->author_email . ">\n"; -$fs->is_file( '/README.md' ); // true -$fs->is_dir( '/subdirectory' ); // true -$fs->get_contents( '/README.md' ); // "Hello, world!" - -// Writing creates a new commit automatically. -$fs->put_contents( '/new-file.txt', 'content' ); - -// Rename a directory. -$fs->rename( '/subdirectory', '/renamed' ); + $parent_hash = $commit->parent_hash; + $commit = $parent_hash ? $repo->read_commit( $parent_hash ) : null; +} ``` -### Working with remotes +### Diff two commits ```php -use WordPress\Filesystem\InMemoryFilesystem; -use WordPress\Git\GitRepository; +$changes = $repo->diff( $commit_hash_a, $commit_hash_b ); -$repo = new GitRepository( InMemoryFilesystem::create() ); -$repo->add_remote( 'origin', 'https://github.com/user/repo' ); +foreach ( $changes as $path => $change ) { + echo $change['status'] . ' ' . $path . "\n"; + // 'A' = added, 'M' = modified, 'D' = deleted +} +``` -$remote = $repo->get_remote_client( 'origin' ); +### Use GitFilesystem anywhere a Filesystem is expected -// List remote refs. -$refs = $remote->ls_refs( 'refs/heads/' ); +Because `GitFilesystem` implements the `Filesystem` interface, you can pass it to any code that operates on a filesystem — including `ZipEncoder` to package a commit as a ZIP file: -// Pull a branch. -$remote->pull( 'refs/heads/trunk' ); +```php +use WordPress\Git\GitFilesystem; +use WordPress\Zip\ZipEncoder; -// Push local changes. -$remote->push( 'trunk' ); +$git_fs = new GitFilesystem( $repo, $commit_hash ); +$encoder = new ZipEncoder( $output_stream ); +$encoder->append_from_filesystem( $git_fs, '/' ); +$encoder->finish(); ``` -## API Reference - -### GitRepository - -| Method | Description | -|---|---| -| `__construct( Filesystem $fs )` | Create a repository backed by a filesystem | -| `add_object( $type, $content )` | Store a blob, tree, or commit; returns its SHA-1 hash | -| `read_object( $oid )` | Read an object by hash; returns a stream with `consume_all()` and `as_commit()` / `as_tree()` | -| `has_object( $oid )` | Check whether an object exists locally | -| `find_hash_by_path( $path, $commit )` | Resolve a file path to its object hash | -| `read_object_by_path( $path, $commit )` | Read a file's content by path | -| `commit( $options )` | Create a commit with `'updates'`, `'deletes'`, and `'move_trees'` | -| `create_branch( $name, $oid )` | Create a new branch | -| `checkout( $branch_or_hash )` | Switch HEAD to a branch or commit | -| `get_branch_tip( $name )` | Get the commit hash a branch points to | -| `set_branch_tip( $name, $oid )` | Point a branch at a specific commit | -| `merge( $branch_name, $options )` | Three-way merge; returns `['new_head' => ..., 'conflicts' => [...]]` | -| `diff_commits( $hash1, $hash2 )` | Diff two commits | -| `add_remote( $name, $url )` | Register a remote | -| `get_remote_client( $name )` | Get a `GitRemote` for push/pull operations | - -### GitFilesystem - -| Method | Description | -|---|---| -| `GitFilesystem::create( $repo )` | Wrap a repository with the Filesystem interface | -| `ls( $path )` | List directory entries | -| `is_file( $path )` / `is_dir( $path )` | Check entry type | -| `get_contents( $path )` | Read file contents | -| `put_contents( $path, $data )` | Write a file (creates a commit) | -| `rename( $from, $to )` | Rename a file or directory | -| `rm( $path )` / `rmdir( $path )` | Delete a file or directory | - -### Model classes - -| Class | Key properties | -|---|---| -| `Commit` | `$hash`, `$tree`, `$parents`, `$author`, `$message` | -| `Tree` | `$entries` (map of name to `TreeEntry`) | -| `TreeEntry` | `$mode`, `$name`, `$hash`; constants `FILE_MODE_REGULAR_NON_EXECUTABLE`, `FILE_MODE_DIRECTORY` | - -## Requirements - -- PHP 7.2+ -- No external dependencies (no `git` binary required) +## Architecture notes + +Git object storage uses a two-level directory scheme: objects live in `.git/objects/ab/cdef...` where `ab` is the first two hex characters of the SHA-1 hash and `cdef...` is the rest. Pack files (compressed bundles of many objects) live in `.git/objects/pack/`. `GitRepository` handles both loose objects and pack file reading transparently. + +The HTTP smart protocol works in two round trips for a fetch: first a discovery request that returns the list of refs the remote knows about, then a pack-file negotiation that uploads a pack containing only the objects you don't already have. `GitRemote` implements this protocol using PHP's HTTP client, with no native dependencies. diff --git a/components/HTML/README.md b/components/HTML/README.md index b034be17d..a736e7918 100644 --- a/components/HTML/README.md +++ b/components/HTML/README.md @@ -1,260 +1,142 @@ # HTML -A full HTML5 parser and tag processor implemented in pure PHP, mirroring WordPress core's HTML API. It provides two levels of access: `WP_HTML_Tag_Processor` for fast, linear scanning and modification of HTML attributes, and `WP_HTML_Processor` for structure-aware parsing that understands nested elements, implicit tag closers, and the HTML5 insertion algorithm. No libxml2, no DOM extension, no external dependencies. +## Why this exists -## Installation +Modifying HTML in PHP usually means one of two things: string manipulation (fragile, breaks on any attribute ordering or whitespace variation) or loading the DOM extension (which requires libxml2, triggers errors on valid HTML5 that doesn't conform to XML rules, and mangles the document in the process). -``` -composer require wp-php-toolkit/html -``` +WordPress needed a third option: a parser that can safely scan and modify real-world HTML — including malformed markup — without any native extension, without loading the whole document into memory, and without altering content it wasn't asked to change. The result is `WP_HTML_Tag_Processor` and `WP_HTML_Processor`, both mirrored here from WordPress core for use outside WordPress. -## Quick Start +The key design insight is that most HTML processing tasks don't need a full DOM tree. You want to find a tag and change one of its attributes. You want to add a class to every ``. You don't need to understand the document structure for that — you just need to scan forward efficiently. `WP_HTML_Tag_Processor` handles that case. When you do need structure — "find the `` inside a `

    ` inside a `
    + + + + diff --git a/docs/_legacy/httpserver/index.html b/docs/_legacy/httpserver/index.html new file mode 100644 index 000000000..08df63089 --- /dev/null +++ b/docs/_legacy/httpserver/index.html @@ -0,0 +1,176 @@ + + + + + +HttpServer — PHP Toolkit + + + + + + + +
    + PHP Toolkit + +
    +
    + +
    +

    HttpServer

    +

    A minimal blocking TCP HTTP server in pure PHP. For CLI tools and tests, not for production traffic.

    + composer require wp-php-toolkit/http-server +

    Sometimes a PHP tool needs a tiny local HTTP surface: a test fixture server, a webhook receiver during development, a CLI tool with a browser UI, or a demo endpoint for another component. Pulling in a production web framework would obscure the example and add dependencies the toolkit avoids.

    The HttpServer component is intentionally small: a blocking TCP server, incoming request objects, and response writers. It is useful for local tools and tests. It is not a replacement for nginx, Apache, php-fpm, RoadRunner, Swoole, or a production application server.

    +

    Use HttpServer when a PHP tool needs one local endpoint. A CLI command can open http://127.0.0.1:8765/callback for an OAuth flow, serve fixture JSON to HttpClient tests, or expose a tiny status page during an import.

    The server accepts a connection, parses one request, and gives your handler a response writer. Keep the process lifetime and shutdown rule in your command.

    +

    You will learn to:

    +
      +
    • Serve one response
    • +
    • Route a small local API
    • +
    • Buffer when headers depend on the body
    • +
    +

    Most snippets below run in the browser through WordPress Playground. Click Run on any example to execute it; edit the code and run again to see what changes. Static snippets show config or shell commands that need a real local environment.

    +

    Hello world on port 8080

    +

    Run on your machine: the Playground sandbox does not allow processes to bind listening TCP ports. Save this snippet locally and run php hello-server.php.

    + + + +

    A tiny JSON router

    +

    Run on your machine: needs a listening port. Once running, try curl localhost:8080/api/status.

    Build a CLI tool with a web UI by switching on the parsed path and method.

    + + + +

    Buffered response with auto Content-Length

    +

    Use BufferingResponseWriter when you want the framework to compute Content-Length for you, or when the runtime is CGI-shaped and expects the full body up front. This one runs anywhere — no socket required.

    + + + + +

    See also

    + +
    +
    + + + diff --git a/docs/_legacy/index.html b/docs/_legacy/index.html new file mode 100644 index 000000000..a79f1dff9 --- /dev/null +++ b/docs/_legacy/index.html @@ -0,0 +1,60 @@ + + + + + +PHP Toolkit — runnable docs + + + + +
    + PHP Toolkit + +
    +
    +

    PHP Toolkit

    +

    Eighteen standalone pure-PHP libraries for WordPress and general PHP, with no extension or Composer dependencies. Each guide starts with the story for that component, outlines the route through the page, names the main APIs, and then uses examples only where code clarifies the idea.

    + +

    Choose a Path

    + + +

    Components

    + + +

    How these examples work

    +

    Most PHP examples embed <php-snippet> elements from WordPress Playground. The first Run click on a page boots a single shared PHP+WordPress runtime in your browser via WebAssembly and unzips the toolkit into it. Subsequent snippets reuse the same runtime, so only the first run pays the boot cost.

    +

    Examples that need a local listening port, a web server, or deployment-specific config are presented as static code blocks so the page does not imply they can run in the browser sandbox.

    +

    The toolkit bundle (docs/assets/php-toolkit.zip, ≈1.8 MB) ships with the docs, so no third-party CDN is involved.

    +
    + + + diff --git a/docs/_legacy/markdown/index.html b/docs/_legacy/markdown/index.html new file mode 100644 index 000000000..a433368a0 --- /dev/null +++ b/docs/_legacy/markdown/index.html @@ -0,0 +1,242 @@ + + + + + +Markdown — PHP Toolkit + + + + + + + +
    + PHP Toolkit + +
    +
    + +
    +

    Markdown

    +

    Bidirectional converter between Markdown and WordPress block markup. Useful for moving content between Markdown files and WordPress while preserving the structures both formats can express.

    + composer require wp-php-toolkit/markdown +

    Many publishing workflows start in Markdown: documentation sites, static-site generators, Git-backed editorial workflows, Obsidian vaults, and developer notes. WordPress stores editor content as block markup. Moving between those worlds by string replacement loses metadata and quickly breaks on lists, tables, code blocks, and frontmatter.

    The Markdown component provides a structured bridge. MarkdownConsumer turns Markdown plus frontmatter into block markup and metadata; MarkdownProducer turns supported block markup back into Markdown. The conversion is meant for practical content workflows, not byte-identical round-tripping of every custom block attribute.

    +

    Use Markdown for files that humans edit and block markup for content that WordPress stores. This component translates the supported middle ground: headings, paragraphs, lists, code blocks, links, images, and frontmatter-backed metadata.

    Keep unsupported syntax visible. A migration tool should tell you that a file contains an unsupported table instead of silently dropping it before publishing.

    +

    You will learn to:

    +
      +
    • Convert one document
    • +
    • Carry metadata beside content
    • +
    • Prepare a folder import
    • +
    +

    Most snippets below run in the browser through WordPress Playground. Click Run on any example to execute it; edit the code and run again to see what changes. Static snippets show config or shell commands that need a real local environment.

    +

    Markdown to blocks

    +

    Feed Markdown into MarkdownConsumer, get block markup back. The result is a BlocksWithMetadata object that holds both the rendered blocks and any frontmatter parsed from the document.

    + + + + +

    Round-trip: blocks back to Markdown

    +

    Pair MarkdownProducer with MarkdownConsumer to convert in either direction. Round-tripping is lossy for block attributes that have no Markdown representation (custom classes, alignment), so do not expect byte-perfect equality.

    + + + + +

    Reading YAML frontmatter as post meta

    +

    Frontmatter keys come back as arrays so a single key can hold multiple values. Use get_meta_value() when you only want the first scalar.

    + + + + +

    Migrating an Obsidian or Hugo folder of Markdown

    +

    Walk a directory of .md files (Obsidian vault, Hugo content/, Jekyll _posts) and emit one block-markup record per file.

    + + + + +

    Counting blocks produced by a Markdown document

    +

    After conversion, the block markup is plain WordPress block markup, so parse_blocks() works on it directly. The standard way to introspect what the converter emitted before saving to the database.

    + + + + +

    See also

    + +
    +
    + + + diff --git a/docs/_legacy/merge/index.html b/docs/_legacy/merge/index.html new file mode 100644 index 000000000..f9b1b0553 --- /dev/null +++ b/docs/_legacy/merge/index.html @@ -0,0 +1,266 @@ + + + + + +Merge — PHP Toolkit + + + + + + + +
    + PHP Toolkit + +
    +
    + +
    +

    Merge

    +

    Three-way merge and diff. Pluggable differ + merger + optional validator.

    + composer require wp-php-toolkit/merge +

    Content synchronization needs more than "last write wins." A Markdown file changes in Git while the same post changes in WordPress. A generated config changes through both a CLI tool and a UI. In those cases you need a common ancestor, two edited versions, and a way to explain conflicts to a human.

    The Merge component provides the diff and three-way merge primitives used by those workflows. The default examples are line-oriented because that is the most familiar shape, but the strategy is intentionally pluggable: choose the differ, choose the merger, and optionally validate the merged result before accepting it.

    Use the merge result to auto-accept independent edits and to show structured conflicts when a person must decide.

    +

    A three-way merge needs the common base, your version, and their version. The base tells the merger whether two lines changed independently or collided.

    Start with line merges for Markdown, config files, and generated PHP. Move to a domain-specific differ only when lines hide the real unit of change.

    +

    You will learn to:

    +
      +
    • See the edit
    • +
    • Auto-merge independent lines
    • +
    • Surface conflicts
    • +
    +

    Most snippets below run in the browser through WordPress Playground. Click Run on any example to execute it; edit the code and run again to see what changes. Static snippets show config or shell commands that need a real local environment.

    +

    Diff two strings line by line

    +

    Feed two strings to LineDiffer and inspect the operations. Every get_changes() entry is a [op, text] pair.

    + + + + +

    Render a unified patch

    +

    format_as_git_patch() produces output that mirrors git diff, including hunk headers — handy for emails, CI annotations, or a "what changed?" panel.

    + + + + +

    Three-way merge with no conflicts

    +

    The classic case: each branch changes a different region. Pass the common ancestor plus both edits to MergeStrategy::merge() and read the merged result.

    + + + + +

    Inspect and surface conflicts

    +

    When both sides edit the same region, the merger produces a MergeConflict. The merged content carries Git-style markers, but the structured get_conflicts() output is what you want for a UI that lets the user pick a side.

    + + + + +

    Sync a Markdown folder against an edited DB copy

    +

    A real-world scenario: posts live both in a Git-tracked Markdown folder and in WordPress, and someone edits each. Three-way-merge each post against its common ancestor.

    + + + + +

    See also

    + +
    +
    + + + diff --git a/docs/_legacy/polyfill/index.html b/docs/_legacy/polyfill/index.html new file mode 100644 index 000000000..3711a0749 --- /dev/null +++ b/docs/_legacy/polyfill/index.html @@ -0,0 +1,194 @@ + + + + + +Polyfill — PHP Toolkit + + + + + + + +
    + PHP Toolkit + +
    +
    + +
    +

    Polyfill

    +

    PHP 8 string functions on PHP 7.2+, WordPress hook stubs, and translation/escaping passthroughs so toolkit code runs without WordPress.

    + composer require wp-php-toolkit/polyfill +

    A lot of WordPress-adjacent code wants to call esc_html(), __(), or apply_filters() without booting WordPress. The polyfill component provides minimal but real implementations so that code runs unchanged outside WordPress, and stays out of the way when WordPress is loaded (every function uses function_exists() guards).

    +

    Load Polyfill when toolkit code runs outside WordPress but still calls WordPress-shaped helpers. Standalone tests can call esc_html(), add a filter, or use a translation stub without booting WordPress.

    The component defines only missing functions. If WordPress or the current PHP runtime already provides a function, the polyfill leaves it alone.

    +

    You will learn to:

    +
      +
    • Backfill missing PHP helpers
    • +
    • Keep familiar WordPress calls
    • +
    • Expose extension points
    • +
    +

    Most snippets below run in the browser through WordPress Playground. Click Run on any example to execute it; edit the code and run again to see what changes. Static snippets show config or shell commands that need a real local environment.

    +

    PHP 8 string functions on PHP 7.2

    +

    The polyfills define str_contains, str_starts_with, str_ends_with, and array_key_first only when missing.

    + + + + +

    Escaping and translation stubs

    +

    Pass-through implementations let you write code that looks WordPressy and runs anywhere.

    + + + + +

    A simple filter chain

    +

    The hook system is a real implementation of the WordPress filter API: registered callbacks get applied in priority order, and each one transforms the running value.

    + + + + +

    Priority ordering and multi-arg passing

    +

    Lower priority numbers run first. The fourth argument to add_filter controls how many context values get passed to the callback.

    + + + + +

    Hook-based extension points in standalone libraries

    +

    Use do_action and apply_filters as cheap extension points in your own code, without depending on WordPress.

    + + + + +

    See also

    + +
    +
    + + + diff --git a/docs/_legacy/xml/index.html b/docs/_legacy/xml/index.html new file mode 100644 index 000000000..facff8a2b --- /dev/null +++ b/docs/_legacy/xml/index.html @@ -0,0 +1,210 @@ + + + + + +XML — PHP Toolkit + + + + + + + +
    + PHP Toolkit + +
    +
    + +
    +

    XML

    +

    A streaming, namespace-aware XML processor in pure PHP. Read and modify huge feeds, WXR exports, ePub manifests, and Office Open XML parts without ever loading the document into memory and without depending on libxml2.

    + composer require wp-php-toolkit/xml +

    SimpleXMLElement and DOMDocument both need libxml2 and both build a complete in-memory tree. XMLProcessor walks the document forward as a cursor, keeps modifications in a side buffer, and emits the full updated XML with get_updated_xml() only when you ask for it.

    This design came from WordPress-scale documents such as WXR exports. A migration may only need to rewrite wp:attachment_url values or bump a feed attribute, so the processor optimizes for targeted cursor edits instead of a full validating XML stack.

    Footgun #1: namespace-aware methods use the namespace name declared in xmlns, not the prefix written in the tag. In WXR, get_attribute( 'wp', 'status' ) looks for a namespace literally named wp; for the usual WXR declaration you want get_attribute( 'http://wordpress.org/export/1.2/', 'status' ).

    Footgun #2: in streaming mode next_tag() can return false because input ran out, not because the document ended. Check is_paused_at_incomplete_input() before assuming you're done.

    +

    XMLProcessor walks XML as a cursor. It reads the next tag, exposes attributes and text, records edits, and emits updated XML only when you call get_updated_xml().

    Query namespaces by URI, not by prefix. In WXR, look for http://wordpress.org/export/1.2/ even when the source file writes the prefix as wp:.

    +

    You will learn to:

    +
      +
    • Edit one attribute
    • +
    • Read namespaced exports
    • +
    • Process export-sized files
    • +
    +

    Most snippets below run in the browser through WordPress Playground. Click Run on any example to execute it; edit the code and run again to see what changes. Static snippets show config or shell commands that need a real local environment.

    +

    Bump every price in a catalog

    +

    Find each <book>, read its price, write a new one, emit the updated document.

    + + + + +

    Read namespaced attributes from a WXR export

    +

    WordPress's WXR commonly uses wp:, dc:, and content: prefixes bound to namespace names such as http://wordpress.org/export/1.2/. Pass that expanded namespace name, not the prefix; the processor handles whichever prefix the document actually uses.

    + + + + +

    Rewrite URLs across an entire WXR export

    +

    Large WXR exports can hold many URLs in <link>, <guid>, and post content. Streaming the file lets you rewrite large exports without loading the whole XML document into memory.

    + + + + +

    Parse OPML to extract feed URLs

    +

    OPML is the format Feedly and many readers use to import/export feed lists. Flat, attribute-heavy XML — exactly what a tag processor handles best.

    + + + + +

    See also

    + +
    +
    + + + diff --git a/docs/_legacy/zip/index.html b/docs/_legacy/zip/index.html new file mode 100644 index 000000000..9d0d1b1d3 --- /dev/null +++ b/docs/_legacy/zip/index.html @@ -0,0 +1,378 @@ + + + + + +Zip — PHP Toolkit + + + + + + + +
    + PHP Toolkit + +
    +
    + +
    +

    Zip

    +

    Read and write ZIP archives in pure PHP — no libzip, no ZipArchive. Streams entries one at a time, so you can build EPUBs, .docx files, and multi-gigabyte plugin bundles without buffering the archive in memory.

    + composer require wp-php-toolkit/zip +

    Common PHP ZIP workflows rely on the ZipArchive extension or shelling out to zip. Those are awkward in hosts without libzip, WebAssembly builds, and code paths that need to stream archive data through toolkit byte streams.

    The Zip component reads and writes Stored and Deflate archives in pure PHP. The decoder is pull-based, so listing the central directory of a 2 GB ZIP costs roughly the size of the directory itself. The encoder accepts any ByteWriteStream as a sink and writes one entry at a time.

    +

    Treat a ZIP as a small filesystem with a table of contents at the end. Read the central directory, open one entry stream, and copy that entry where it belongs.

    Use ZipFilesystem when your code wants get_contents() and ls(). Use ZipEncoder and ZipDecoder when the archive format matters, such as an EPUB that must store mimetype first and uncompressed.

    +

    You will learn to:

    +
      +
    • Open an archive as files
    • +
    • Write a format with rules
    • +
    • Move archives through streams
    • +
    +

    Most snippets below run in the browser through WordPress Playground. Click Run on any example to execute it; edit the code and run again to see what changes. Static snippets show config or shell commands that need a real local environment.

    +

    Read a file out of a ZIP

    +

    ZipFilesystem implements this toolkit's Filesystem interface, so once you wrap the byte reader you can call get_contents(), ls(), and is_dir() just like the other filesystem backends.

    Try this: after Run, add a second append_file() call before $enc->close() for a notes.md entry, then call print_r( $zip->ls( '/' ) ) at the end. The directory listing reflects the new entry without re-reading the file.

    + + + + +

    Build an EPUB from scratch

    +

    An EPUB follows one strict ZIP rule: write the mimetype entry first and store it without compression. Deflate the rest of the archive normally.

    Gotcha: e-readers reject EPUBs whose mimetype entry has compression. Use COMPRESSION_NONE for that single entry.

    + + + + +

    Stream a large entry without buffering it

    +

    Calling get_contents() on a 500 MB CSV inside a ZIP would eat 500 MB of RAM. Use open_read_stream() instead and inflate-as-you-go.

    Gotcha: only one entry stream open at a time. Drain or finish the previous stream before opening the next.

    + + + + +

    Repack: modify one file, copy the rest

    +

    Updating one file in a ZIP without rewriting the others is impossible at the format level — the central directory points at byte offsets. The pragmatic answer is repack: stream the source archive into a new one, swapping the file you care about.

    + + + + +

    Defend against zip-slip

    +

    A malicious archive can name an entry ../../etc/passwd and trick a naive extractor into clobbering files outside the destination. ZipDecoder::sanitize_path() strips leading ../ segments and collapses internal /../ sequences before exposing the path.

    + + + + +

    Pipe ZIP entries into an InMemoryFilesystem

    +

    Real-world recipe: take an uploaded plugin ZIP, expand it into an InMemoryFilesystem so you can validate, edit, or scan it before it ever touches disk. Three components compose into something you couldn't build with ZipArchive alone.

    + + + + +

    See also

    + +
    +
    + + + diff --git a/docs/assets/page.js b/docs/assets/page.js new file mode 100644 index 000000000..440756e2c --- /dev/null +++ b/docs/assets/page.js @@ -0,0 +1,196 @@ +// Per-page wiring: blueprint, editable snippets, sticky TOC. +// +// Pages declare their h2/h3 structure plainly; this script: +// 1. Fills the shared + + + + +
    + PHP Toolkit + +
    + +
    + + + +
    + + +

    Rewriting HTML safely

    + +

    In the quickstart you added a single attribute to a single tag. In this chapter we'll do the work the importer actually needs to do on real-world post content: add lazy loading to every image, rewrite relative links to absolute URLs, and strip the script tags and inline event handlers that creep into pasted HTML. By the end you'll have a clean_post_html() function the next chapter will plug into the importer.

    + + + +

    The input we're cleaning

    + +

    The importer's job is to read a folder of Markdown posts. Each post has frontmatter, prose, and inline HTML that survived from a previous CMS — a mix of helpful markup, sloppy markup, and the occasional <script> tag someone pasted from Stack Overflow. Here's a representative example:

    + + + +HTML; + + + +

    Three things need fixing before this HTML belongs in a database:

    + +
      +
    1. The <img> has no loading hint, so it'll fetch eagerly even when it's far below the fold.
    2. +
    3. Two of the four <a> tags use relative URLs. They were correct on the source site; on the destination site they'll point to nothing.
    4. +
    5. There's a <script> tag and an onmouseover handler. They have to go.
    6. +
    + +

    Each of the next three sections fixes one of these. They all use the same component — WP_HTML_Tag_Processor — and the same shape: open a processor over the HTML, walk it, ask the cursor to make edits, then call get_updated_html() for the result.

    + +

    Lazy-load every image

    + +

    Start with the most ergonomic fix: add loading="lazy" to every image that doesn't already have a loading attribute. The processor's filter argument lets us skip everything that isn't an <img>:

    + + + + + + +

    Notice three things in the output. The first <img> gained both loading="lazy" and decoding="async". The second <img> kept its author-provided loading="eager" — the null === get_attribute( 'loading' ) guard saw it and skipped the lazy line — but still gained decoding="async", because that's an unconditional set_attribute() call. And every byte that wasn't an image attribute came through untouched: the <figure>, the whitespace, the <figcaption>, the <p>, even the prose inside it.

    + + + + + +

    Rewrite relative URLs to absolute

    + +

    The importer needs every link in a post to be addressable from the destination site. /recipes/sauces meant something on the source site; on the destination it points to nothing. We'll resolve every relative href against a base URL — and leave protocol-relative URLs, fragments, and absolute URLs alone.

    + + + + + + +

    The pattern in the body of the loop is a small URL classifier: scheme-bearing URLs and scheme-relative ones (//other.test/...) are already absolute; fragments stay on the current document; everything else gets the base URL prepended. The classifier itself is forgettable boilerplate — what matters is that the processor lets us write it once and apply it to every <a> in the document with five lines of loop scaffolding.

    + + + + + +

    Strip script tags and inline event handlers

    + +

    This is the security-shaped fix. A user pasted some HTML, and an onmouseover handler came along with it. Maybe a <script> tag too. The importer needs to neutralize both before the content lands in a database that will later be rendered into someone else's browser.

    + +

    For inline event handlers, get_attribute_names_with_prefix( 'on' ) returns every attribute on the current tag whose name starts with on — that's onclick, onmouseover, onerror, every variant. We loop over the returned names and remove each.

    + +

    For <script> tags, set_modifiable_text('') blanks the script's body without disturbing the surrounding markup. Combined with stripping its attributes, the result is an inert <script></script> shell — readable, valid HTML, but executable as a no-op.

    + + +' + . ''; + +$tags = new WP_HTML_Tag_Processor( $untrusted ); +while ( $tags->next_tag() ) { + $tag = $tags->get_tag(); + + // 1. Neutralize script bodies and remove their attributes. + if ( 'SCRIPT' === $tag && ! $tags->is_tag_closer() ) { + $tags->set_modifiable_text( '' ); + foreach ( $tags->get_attribute_names_with_prefix( '' ) as $attr ) { + $tags->remove_attribute( $attr ); + } + continue; + } + + // 2. Remove every on* handler on every other tag. + foreach ( $tags->get_attribute_names_with_prefix( 'on' ) as $handler ) { + $tags->remove_attribute( $handler ); + } +} + +echo $tags->get_updated_html(); + + + + + +

    Read the output carefully. The onclick, onerror, and onmouseover attributes are gone. Both <script> tags survive structurally — empty, no src, no body — but they're inert. The surrounding <p>, <figure>, and <figcaption> markup is unchanged.

    + + + + + +

    Combine the three into one function

    + +

    Each of the three rewrites above used its own WP_HTML_Tag_Processor instance. That's fine for a tutorial, but the importer is going to call this on every post — twelve, then a hundred, then a thousand — and each instance allocates a little state. We'll fold all three into a single pass over a single processor.

    + +

    This is also the function chapter 2 will import and reuse. Save the shape:

    + + + +HTML; + +echo clean_post_html( $post_html, 'https://recipes.example.com/' ); + + + + + +

    One processor instance, one walk, one allocation of update state. Real importers run this on the body of every post they ingest — call it ten thousand times in a long export and the difference between one allocation per post and four is measurable.

    + + + +

    When the tag-level cursor is the wrong tool

    + +

    Everything above used WP_HTML_Tag_Processor, which walks tags as a flat sequence. It doesn't know that <img> is inside <figure>; it just sees them in document order. For attribute rewriting that's perfect — fast, allocation-light, byte-honest.

    + +

    It's the wrong tool when ancestry matters. If you need "every <img> directly inside a <figure>, but not images in paragraphs," or "the <h1> at the top of the article body, ignoring <h1>s nested inside <blockquote>," reach for WP_HTML_Processor — the same component, one class up. It implements HTML5 tree construction, so you can query by ancestry (breadcrumbs) and trust that <p>one<p>two parses as two paragraphs the way a browser sees it.

    + +

    The reference page for the HTML component (reference/html.html) shows both processors side by side with worked examples. We won't need the full processor in the importer.

    + +

    Recap

    + +

    You can now:

    + +
      +
    • Open a WP_HTML_Tag_Processor over an HTML string and walk it with next_tag().
    • +
    • Add, replace, and remove attributes — and read the result with get_updated_html() — without disturbing untouched bytes.
    • +
    • Use get_attribute_names_with_prefix() to find and remove every on* handler in a single pass.
    • +
    • Blank the body of a special-content tag (<script>, <style>) with set_modifiable_text('').
    • +
    • Combine multiple rewrites into a single processor walk for performance and clarity.
    • +
    • Recognize when the cursor model is the right tool and when ancestry-aware WP_HTML_Processor is.
    • +
    + +

    The clean_post_html() function is the importer's first real piece. We'll use it again in chapter 3.

    + + + +

    In chapter 2 the importer's input becomes a real ZIP file: a thousand Markdown posts in a 40 MB archive that you can't afford to extract to disk on a memory-constrained host. We'll wrap the archive as a Filesystem, read entries one at a time, and pipe them into a memory-backed staging filesystem the next chapter will read from.

    + +
    +
    + + + + diff --git a/docs/learn/02-streaming-archives.html b/docs/learn/02-streaming-archives.html new file mode 100644 index 000000000..5aaa3f566 --- /dev/null +++ b/docs/learn/02-streaming-archives.html @@ -0,0 +1,349 @@ + + + + + +Chapter 2 — Streaming archives · PHP Toolkit + + + + + + + +
    + PHP Toolkit + +
    + +
    + + + +
    + + +

    Streaming archives

    + +

    In chapter 1 you wrote clean_post_html(), a function that takes one HTML string and returns a clean one. The importer needs to run that on a thousand posts at a time. In this chapter the input becomes a real ZIP file: a folder of Markdown posts that you can't always extract to disk — the host might not give you a writable scratch directory, the runtime might not have a persistent filesystem at all. We'll wrap the archive as a Filesystem, read entries one at a time, and stage them in memory for chapter 3 to import.

    + + + +

    Why we don't extract to disk

    + +

    The naive approach to importing a ZIP of posts is $zip->extractTo('/tmp/staging'), then walk the directory. That's fine if you control the host. The toolkit's whole point is that you often don't. Shared hosts ration disk quota; WebAssembly runtimes have no persistent disk; Docker containers running as non-root may not be able to write where you'd like.

    + +

    We sidestep the issue by never extracting. ZipFilesystem reads entry data on demand directly from the archive bytes, and an InMemoryFilesystem gives us a place to stage results that vanishes when the process ends. The importer reads from one and writes to the other; the disk is never involved.

    + +

    Open the input ZIP as a filesystem

    + +

    The ZIP component's highest-level type is ZipFilesystem — an archive presented through the same Filesystem interface that InMemoryFilesystem and LocalFilesystem implement. Once you've wrapped it, you call get_contents(), ls(), and is_dir() the same way you would on disk:

    + + + + + + +

    Three things matter in that snippet. The build-the-archive part (ZipEncoder, FileEntry, MemoryPipe) is scaffolding so the example runs end-to-end; in your real importer the ZIP comes from argv. The read part is one line: ZipFilesystem::create( FileReadStream::from_path( $path ) ) wraps the archive bytes and gives you the interface. And the loop reads each entry's contents, but doesn't extract — the bytes get inflated on demand and discarded after we're done with them. Memory stays flat regardless of how big the archive is.

    + + + +

    Stream a large entry without buffering it

    + +

    For our small Markdown posts get_contents() is fine. But the importer might also include a data.csv with twenty thousand rows of metadata, or a large JSON file describing categories. open_read_stream() returns a pull-based byte reader instead of a buffered string, so you can process the entry chunk-by-chunk:

    + + + + + +

    That pull-loop is the same shape every byte stream in the toolkit uses. pull(8192) means "buffer up to 8 KB"; consume($n) reads and advances. The trailing partial line gets carried into the next iteration. Memory used is the chunk size plus one partial line — the same regardless of whether the file is 50 KB or 5 GB.

    + + + +

    Stage the imports in memory

    + +

    Now we connect the two halves. The input is the ZIP we just opened. The staging area is an InMemoryFilesystem — same interface, no disk. Walking the input and copying into the stage is one helper:

    + + + + + +

    Read that example carefully because it's the heart of how the importer composes. The input is read-only (a ZIP) and the output is writable (in-memory). Both expose the same interface, so a generic copy_between_filesystems() works on both. In chapter 3 we'll iterate the staged Markdown files and convert them; in chapter 4 we'll add downloaded media to the same stage. The shape doesn't change between chapters — only what's in the stage.

    + + + +

    Defend against malicious archive paths

    + +

    Every importer that accepts external ZIPs needs to defend against zip-slip: an archive containing an entry named ../../etc/passwd that, if extracted naively, writes outside the intended destination. The toolkit ships a one-line defense:

    + + + + + +

    Run any entry path through ZipDecoder::sanitize_path() before using it as a key in your destination filesystem. copy_between_filesystems() already does this; if you build your own loop you must too.

    + +

    Folding it into the importer

    + +

    The importer so far has chapter 1's clean_post_html() and chapter 2's stage. Combine them: open the input ZIP, copy it into the stage, then iterate the stage's posts/ directory and remember to apply clean_post_html() when we render in chapter 3. We're not invoking it yet because the Markdown-to-HTML conversion is chapter 3's job — but we can already see the shape:

    + + + + + +

    Three small functions, each with a single job. open_input_zip() is one line and exists mostly for readability. stage_input() is the composition we just built. each_post() is a generator so the caller can iterate without loading every post's text at once. The signatures take the abstract Filesystem type, not InMemoryFilesystem, which means a future version of the importer that stages on disk for a debugging session would not need any code change.

    + +

    Recap

    + +

    You can now:

    + +
      +
    • Wrap a ZIP archive as a Filesystem with ZipFilesystem::create() and read entries through the standard interface.
    • +
    • Stream a large entry with open_read_stream(), the pull() / consume() loop, and a trailing-partial-line carry.
    • +
    • Stage data in InMemoryFilesystem for in-process work, and swap to a different backend without changing the calling code.
    • +
    • Compose source and destination filesystems with copy_between_filesystems() in one helper call.
    • +
    • Defend against zip-slip with ZipDecoder::sanitize_path().
    • +
    + +

    The stage is now ready to feed chapter 3, where the Markdown-to-blocks conversion actually happens.

    + + + +

    In chapter 3 we'll turn each Markdown post into WordPress block markup, run it through clean_post_html(), and stream the whole thing into a WXR file the WordPress importer plugin will accept. Three more components — Markdown, BlockParser, and DataLiberation — finally meet the importer.

    + +
    +
    + + + + diff --git a/docs/learn/03-importing-content.html b/docs/learn/03-importing-content.html new file mode 100644 index 000000000..5a24f80ea --- /dev/null +++ b/docs/learn/03-importing-content.html @@ -0,0 +1,452 @@ + + + + + +Chapter 3 — Markdown to WXR · PHP Toolkit + + + + + + + +
    + PHP Toolkit + +
    + +
    + + + +
    + + +

    Markdown to blocks to WXR

    + +

    Chapter 1 cleaned a single HTML string. Chapter 2 staged a folder of Markdown files in memory. This chapter turns each of those files into the actual format the WordPress importer plugin reads: WXR, an extended-RSS export. Along the way we meet three more components — Markdown, BlockParser, and DataLiberation — and watch them compose into something none of them could do alone.

    + + + +

    Markdown to block markup

    + +

    The Markdown component does one thing well: it bridges Markdown and WordPress block markup, in either direction. The MarkdownConsumer class takes a Markdown string and returns a result object containing both the rendered block markup and any frontmatter parsed from the document's leading YAML.

    + + + + + +

    Two outputs come back: the post metadata (read with get_meta_value() for scalars, or get_all_metadata() for the raw structure) and the block markup itself, which is the <!-- wp:heading -->…<!-- /wp:heading --> string that WordPress stores in post_content. From here on we treat that string the way WordPress treats it.

    + + + +

    Audit the produced blocks

    + +

    Before we ship the converted post into a WXR file, the importer should sanity-check what came out. Did Markdown conversion produce blocks the destination site can render? Are there headings out of order? Are there blocks the importer doesn't know how to handle? WP_Block_Parser walks the same block markup WordPress core uses and gives us a structured tree:

    + + + + + +

    Two patterns to keep. The flat counter (a queue that walks innerBlocks) answers any "how many" or "does it use" question. The level checker is a domain-specific rule — accessibility wants no jumps in heading depth — but every audit you'll write follows the same shape: walk the tree, gate by blockName, ask the question. The reference page for BlockParser covers both patterns in more depth.

    + +

    Apply chapter 1's cleaner inside the blocks

    + +

    Block markup is HTML embedded in HTML comments. Chapter 1's clean_post_html() takes an HTML string and returns a clean one — we can run it on the whole block-markup string in one pass, because WP_HTML_Tag_Processor is happy to walk the HTML between the block-comment delimiters. The block comments themselves don't look like tags to the processor, so they pass through untouched:

    + + + + + +

    Notice how <!-- wp:heading --> survived the walk verbatim. The Tag Processor only sees real HTML tags; comments and text aren't tags to it. That's why combining the two components here works without any special-casing — Markdown produces block markup, the cleaner walks the HTML inside it, and the comments pass through as plain bytes.

    + +

    Stream a WXR file with DataLiberation

    + +

    We have post titles, post content (clean block markup), post metadata. The format the WordPress importer reads is WXR — WordPress eXtended RSS — an XML dialect with a fixed shape. DataLiberation's WXRWriter takes ImportEntity objects and streams them into a byte sink, one entity at a time, without ever holding the whole export in memory:

    + + + + + +

    The writer holds only what it needs to close currently-open XML tags — fewer than ten kilobytes of state for any reasonable pipeline. Every append_entity() writes one item to the underlying byte sink and forgets it. You can build a WXR from twenty thousand posts on a host with sixty-four megabytes of RAM and the importer code looks no different from the two-post version above.

    + + + +

    End-to-end: Markdown folder to WXR file

    + +

    Now we wire it all together. The pipeline reads the staged Markdown files from chapter 2, converts each to block markup, cleans the HTML inside it, builds an ImportEntity with title and slug from frontmatter, and streams the whole thing into a WXR document. This is the importer's first complete end-to-end run:

    + + + + + +

    One pass. Three components composed (Markdown for parsing, HTML for cleaning, DataLiberation for WXR), each doing one thing well. The output is real WXR — drop it on a WordPress site through the importer plugin and you get three published posts with the cleaned content, the right slugs, and the frontmatter titles.

    + + + +

    Refinement: rewrite URLs across an existing WXR

    + +

    The pattern above (build WXR from sources) is one half of DataLiberation. The other half is reading and transforming an existing WXR. WXREntityReader emits one entity at a time from a WXR document, and you can wire it to a WXRWriter to produce a transformed copy:

    + + + + + +

    The same pattern handles every "transform an export between sites" job — staging-to-production URL rewrites, theme migrations, slug normalization. Reader on the left, writer on the right, your transformation in the middle. Feed the reader bytes incrementally (instead of append_bytes( $source ) all at once) and pipe the writer to a file sink (instead of MemoryPipe), and the same code processes a 10 GB export with the memory footprint of one entity at a time.

    + + + +

    Recap

    + +

    You can now:

    + +
      +
    • Convert Markdown plus YAML frontmatter into block markup with MarkdownConsumer.
    • +
    • Walk the produced block tree with WP_Block_Parser to count, audit, or rewrite blocks.
    • +
    • Apply HTML rewrites to block markup without breaking the surrounding block comments.
    • +
    • Stream a WXR document with WXRWriter in constant memory regardless of input size.
    • +
    • Read an existing WXR with WXREntityReader and pipe its entities through a transformation into a new WXR.
    • +
    + +

    The importer is now functionally complete for text content. What's missing is the network — when a Markdown post references ![](https://cdn.example.com/bread.jpg), the destination site doesn't have that image. Chapter 4 fixes that.

    + + + +

    In chapter 4 the importer learns to fetch the images referenced from imported posts: ten downloads at a time, with progress reporting, ranged-resume on partial failures, and the option to mount a remote ZIP without downloading it first. The HttpClient component meets the importer.

    + +
    +
    + + + + diff --git a/docs/learn/04-talking-to-the-network.html b/docs/learn/04-talking-to-the-network.html new file mode 100644 index 000000000..fd27a05cf --- /dev/null +++ b/docs/learn/04-talking-to-the-network.html @@ -0,0 +1,372 @@ + + + + + +Chapter 4 — Talking to the network · PHP Toolkit + + + + + + + +
    + PHP Toolkit + +
    + +
    + + + +
    + + +

    Talking to the network

    + +

    By the end of chapter 3 the importer produces a valid WXR from a folder of Markdown. There's one loose thread: when a post references ![](https://cdn.example.com/bread.jpg), the destination site has no bread.jpg in its media library. The WordPress importer plugin will try to fetch each remote image as it runs, but that's a fragile thing to do during an import — slow, easy to rate-limit, easy to leave behind half-fetched media. The robust answer is to fetch the images before the import and stage them locally so the import can reference local paths. This chapter covers the fetch side of that work using HttpClient. We'll also see how the same client mounts a remote ZIP for streaming, which means in some workflows you don't need chapter 2's local archive at all.

    + + + +

    Why a new HTTP client

    + +

    The instinct is file_get_contents( $url ) or curl_exec(). Both work — until they don't. file_get_contents on a URL needs allow_url_fopen, which security-conscious hosts disable. curl_exec needs the curl extension, which WebAssembly builds of PHP don't ship. And the simplest forms of both — no CURLOPT_FILE, no chunked stream wrapper — buffer the whole response into one PHP string, which is fatal for a 50 MB media file on a host with a 64 MB memory limit.

    + +

    HttpClient gives you the same shape regardless of host capabilities: an event loop, response objects with status codes and headers, response bodies as ByteReadStreams you can pipe somewhere instead of buffering. Under the hood it picks curl when available and PHP stream sockets otherwise. From your code's perspective those two transports are identical.

    + +

    Fetch one URL

    + +

    The smallest possible request: create a Request, hand it to Client::fetch(), wait for the response, read the body. The result of fetch() is a stream — the response headers arrive at await_response(), and the body bytes come through consume_all() or chunk-by-chunk via pull()/consume():

    + + + + + +

    Read the lifecycle. fetch() returns immediately with a stream object — the request is queued, not yet executed. await_response() blocks until the response headers have arrived, then returns the Response object. consume_all() reads the body to completion. Splitting "headers" from "body" matters because for some workflows (progress reporting, redirect logging, content-type sniffing) you act on the headers before deciding what to do with the body.

    + +

    Download an image to the stage

    + +

    The importer's job in this chapter is to take an image URL, fetch it, and place the bytes into the staging filesystem under a deterministic local path. We'll write the bytes incrementally so the response never has to fit into memory:

    + + + + + +

    Notice the function signature. It takes a Filesystem, not a directory path; it takes a Client, not a URL string transformed into one. That keeps it testable — you can pass an InMemoryFilesystem and a mock client and the function doesn't know the difference. It also keeps the HTTP and storage decisions out of the caller, so when you later swap the in-memory stage for LocalFilesystem, the function is unchanged.

    + +

    The event loop, with progress

    + +

    For files small enough that you don't care about memory, consume_all() is fine. For big ones, you want to know how the download is going and write bytes as they arrive. Drop down a layer: Client::enqueue() + await_next_event() exposes every stage of the request as an event you can react to:

    + + + + + +

    Read the event flow. EVENT_GOT_HEADERS fires once when headers come in — useful for sniffing Content-Length or rejecting based on status. EVENT_BODY_CHUNK_AVAILABLE fires repeatedly as the body comes in — that's where you write to disk, update progress, or compute a hash. EVENT_FINISHED or EVENT_FAILED ends the request. Memory used is one chunk at a time; the importer can stream a 500 MB file under any memory limit large enough to hold the chunk size.

    + +

    A sliding window of ten concurrent downloads

    + +

    The importer might reference dozens of images. Doing them one at a time would be unnecessarily slow; firing all of them at once would hammer the upstream and risk being rate-limited. The polite move is a fixed-size window: keep ten requests in flight, and as each one finishes, enqueue the next:

    + + + + + +

    The sliding window is a small piece of bookkeeping — a pending queue, an active set, an "enqueue next" callback — wrapped around the same event loop you saw above. Real importers do exactly this for media frontloading. The concurrency option in the Client constructor is the upper bound; the bookkeeping enforces a moving window so you don't enqueue more work than the window holds.

    + + + +

    Resume a partial download

    + +

    Long downloads fail. Sometimes the network drops, sometimes the host runs out of execution time. The importer should be able to resume rather than redownload. HTTP's contract for that is Range: bytes=N-. Sending it to a cooperating server returns a 206 Partial Content response with the missing bytes:

    + + + + + +

    The defensive check matters: not every server respects Range, especially when sitting behind a CDN with caching that doesn't know how to pass the header upstream. If you ask for a partial response and the server hands you a fresh 200 instead, your existing bytes don't match what's coming and you have to start over. That's the recursion in resumable_download() — it's a one-line fallback rather than a separate retry path.

    + +

    Stream a remote ZIP through ZipFilesystem

    + +

    The importer's input is a ZIP — chapter 2 read it from disk. But what if the ZIP lives on a URL? Downloading it whole, opening it with ZipFilesystem, then deleting the file afterwards works, but it asks you to coordinate a temp path the toolkit could manage for you. SeekableRequestReadStream wraps a Request as a seekable byte stream that ZipFilesystem can read directly: bytes are downloaded sequentially as the consumer reads, the class caches them in an internal temp file (cleaned up when you call close_reading()), and seeks back into already-downloaded ranges hit the cache instead of re-fetching:

    + + + + + +

    That's the entire chapter-2 setup with a remote URL substituted for the local file. SeekableRequestReadStream downloads the response body once, lazily, into a temporary file as ZipFilesystem asks for bytes — so reads work the way they would on a local file (including the seeks that ZipFilesystem performs to find the central directory at the end of the archive). The temp file caches what's been seen, so seeking backwards doesn't re-fetch.

    + + + +

    End-to-end: the importer, finally complete

    + +

    The importer now spans four chapters' worth of components. The full shape:

    + +
      +
    1. Open the input ZIP — locally with ZipFilesystem, or remotely with SeekableRequestReadStream.
    2. +
    3. Stage its contents in an InMemoryFilesystem with copy_between_filesystems().
    4. +
    5. For each Markdown file in the stage, run MarkdownConsumer, then clean_post_html() on the produced block markup.
    6. +
    7. For each image URL referenced from the cleaned content, fetch it with HttpClient through a sliding-window concurrency loop and stage the bytes alongside the WXR.
    8. +
    9. Stream the whole thing into a WXR document with WXRWriter, with the cleaned post markup as content and rewritten image references pointing at the local paths under the staged uploads tree.
    10. +
    + +

    The full importer is roughly a hundred lines of PHP. It depends on no extension beyond json and mbstring. It runs in browser-side WebAssembly, on PHP 7.2 through 8.3, and on every shared host that's kept up with PHP releases. That's the toolkit's whole pitch — pure-PHP libraries that handle the work the platform usually outsources to extensions.

    + +

    Recap

    + +

    You can now:

    + +
      +
    • Fetch a URL with Client::fetch() and read the body either whole (consume_all()) or in chunks (pull()/consume()).
    • +
    • Drive the event loop with enqueue() + await_next_event() for progress reporting and per-chunk processing.
    • +
    • Maintain a sliding window of N concurrent requests by tracking active and pending sets.
    • +
    • Resume a partial download with the Range header, and fall back to a full download when the server doesn't honor it.
    • +
    • Mount a remote ZIP through SeekableRequestReadStream so ZipFilesystem can seek over the response — bytes are downloaded lazily into a temp-file cache as they're read.
    • +
    + + + +

    The recap page summarizes what the four chapters built, what's still in the toolkit beyond what we used, and where to look in the reference for the components we didn't visit (Git for snapshots, Merge for sync, HttpServer for OAuth callbacks, Blueprints for site setup).

    + +
    +
    + + + + diff --git a/docs/learn/index.html b/docs/learn/index.html new file mode 100644 index 000000000..c5f07f4fa --- /dev/null +++ b/docs/learn/index.html @@ -0,0 +1,101 @@ + + + + + +Learn — PHP Toolkit + + + + +
    + PHP Toolkit + +
    + +
    + +

    Learn the toolkit by building a content importer

    + +

    Across four chapters and roughly forty-five minutes, you'll build a small WordPress content importer in pure PHP. It reads a folder of Markdown posts, rewrites the HTML inside them, packages everything as a ZIP-backed staging filesystem, and finally emits a WXR file ready to feed to WordPress's importer. Every snippet runs in your browser. You finish with code you can keep.

    + +
    +

    Before you start

    +

    You should be comfortable reading and writing PHP. You don't need to know WordPress internals — we'll explain the WordPress-specific bits as they come up. You don't need anything installed locally; the runnable snippets execute via WordPress Playground in the page.

    +

    If you want to run the same code on your own machine afterwards, every example works under PHP 7.2 or newer with composer require wp-php-toolkit/<component>.

    +
    + +
    +

    The path

    + +
      +
    1. + + Quickstart + Five minutes from zero to a runnable HTML rewrite. The shape of every later chapter, in miniature. + +
    2. +
    3. + + Chapter 1 — Rewriting HTML safely + Add loading="lazy" to images, rewrite relative links, strip script tags. Meet the cursor model that underlies every later chapter. + +
    4. +
    5. + + Chapter 2 — Streaming archives + Read your importer's input from a ZIP without buffering it in memory. Compose a ZIP-backed filesystem with a memory-backed staging filesystem. + +
    6. +
    7. + + Chapter 3 — Markdown to blocks to WXR + Turn each Markdown file into block markup, then assemble those blocks into a WordPress eXtended RSS export. + +
    8. +
    9. + + Chapter 4 — Talking to the network + Frontload the images referenced from the imported posts using HttpClient with progress, redirects, and resumable downloads. + +
    10. +
    11. + + Recap + What you can now do, and where the rest of the toolkit lives. + +
    12. +
    +
    + +
    +

    What you're building

    + +

    By the end of chapter 4, you'll have a small command — call it importer.php — that takes a ZIP file of Markdown posts and produces a WordPress WXR file:

    + +
    $ php importer.php posts.zip > export.xml
    +parsed 12 posts, 8 images
    +rewrote 47 inline links
    +fetched 8 image attachments (2.3 MB)
    +wrote export.xml (94 KB, valid WXR 1.2)
    + +

    It's a real tool — the WordPress importer plugin will accept the output. You'll write it in pieces, one component per chapter, and the canonical example file grows with each chapter. By chapter 4 it's a hundred lines of pure PHP that depends on no extension beyond json and mbstring.

    + +

    The actual content — twelve Markdown posts about cooking, with embedded images and frontmatter — is part of the tutorial's example data and ships pre-loaded into every snippet. You'll see it in chapter 1.

    +
    + +
    + Start the quickstart → +
    + +
    + + + + diff --git a/docs/learn/quickstart.html b/docs/learn/quickstart.html new file mode 100644 index 000000000..9ef1dfdcd --- /dev/null +++ b/docs/learn/quickstart.html @@ -0,0 +1,129 @@ + + + + + +Quickstart — PHP Toolkit + + + + + + + +
    + PHP Toolkit + +
    + +
    + + + +
    + +

    Quickstart

    + +

    By the end of this page you will have rewritten an HTML attribute in five lines of PHP, in a runtime that lives inside this browser tab. You'll see the shape every chapter of the tutorial follows: a problem in plain English, a small chunk of code, and a paragraph that points at what to look at.

    + +

    Install

    + +

    You don't need to install anything to follow the tutorial — the snippets on this site run in your browser. If you want to run the same code in your own project later, this is the line you'll copy:

    + +
    composer require wp-php-toolkit/html
    + +

    Each component installs separately; you only pull in what you use. The HTML component depends on nothing except PHP itself.

    + +

    Rewrite an attribute

    + +

    Here's the smallest useful thing the toolkit does. The example feeds a snippet of HTML into WP_HTML_Tag_Processor, finds every <img> tag, and adds loading="lazy" if the author didn't already set loading themselves.

    + +

    Click Run. The first run on this page boots a PHP runtime in WebAssembly and unzips the toolkit into it; later runs reuse the same runtime, so they're instant.

    + + + + + + +

    Look at the output. The first <img> gained loading="lazy". The second one — which had loading="eager" already — was left alone. The whitespace, the <p> tag, the <article> wrapper, every byte we didn't ask the processor to touch came through unchanged. That property is the entire reason this component exists: rewriting HTML byte-for-byte without re-serializing it.

    + + + +

    Why a cursor, not a DOM

    + +

    The traditional PHP move here is DOMDocument::loadHTML. That works, but loading 50 KB of post content into a libxml DOM, mutating it, and serializing it back gives you a string that's nearly the same as the input — different whitespace, normalized attribute quoting, occasionally a self-closing tag where there wasn't one before. For email templates and feed readers that compare strings byte-for-byte, that's a bug.

    + +

    The Tag Processor walks the HTML linearly, records edits as a small list of byte-range replacements, and applies them only when you call get_updated_html(). The HTML you didn't edit comes through bit-identical. The HTML you edited contains exactly your edits, and nothing else.

    + +

    That model — small, linear, byte-honest — is the toolkit's whole sensibility. Every other component that follows uses some version of it.

    + +

    Recap

    + +

    You can now:

    + +
      +
    • Run a PHP Toolkit snippet in the browser without installing anything.
    • +
    • Read an HTML string with WP_HTML_Tag_Processor and walk every tag of a given name.
    • +
    • Add or replace an attribute with set_attribute() and read the modified HTML with get_updated_html().
    • +
    + +

    That's the whole shape of the tutorial. Each chapter takes one component, shows you the smallest useful thing it does, and folds the result into a content importer that grows page by page.

    + + + +

    In chapter 1 you'll meet the canonical importer's first input — a folder of Markdown posts whose embedded HTML needs cleaning before it ever sees a WordPress database. We'll add lazy loading, rewrite relative URLs, and strip event handlers in a single linear pass.

    + +
    +
    + + + + diff --git a/docs/learn/recap.html b/docs/learn/recap.html new file mode 100644 index 000000000..0b5ae055b --- /dev/null +++ b/docs/learn/recap.html @@ -0,0 +1,95 @@ + + + + + +Recap · PHP Toolkit + + + + +
    + PHP Toolkit + +
    + +
    + +

    Recap and where to go next

    + +

    Across four chapters you built a working content importer. It reads a ZIP of Markdown posts, cleans the HTML inside each one, frontloads referenced images over HTTP, and streams a WXR file the WordPress importer plugin will accept. None of it required curl, libzip, libxml2, or DOMDocument; all of it runs on PHP 7.2 through 8.3 and inside a browser via WordPress Playground.

    + +
    +

    What you built

    + + + + + + +
    Chapter 1clean_post_html() using WP_HTML_Tag_Processor: lazy-load images, rewrite URLs, strip scripts, all in one pass.
    Chapter 2Read the input ZIP through ZipFilesystem, stage it in InMemoryFilesystem, defend against zip-slip with ZipDecoder::sanitize_path().
    Chapter 3Convert each post with MarkdownConsumer, audit the output with WP_Block_Parser, stream the WXR with WXRWriter.
    Chapter 4Frontload images with HttpClient through a sliding-window event loop; mount remote archives with SeekableRequestReadStream.
    +
    + +
    +

    What the toolkit does that the tutorial didn't touch

    + +

    The importer used eight components. The toolkit ships eighteen. Here's what's left, with the use case each one shows up in:

    + +
      +
    • Git — snapshot your importer's runs into a pure-PHP Git repository for revision history. Useful for "what changed between last week's import and this week's." Reference →
    • +
    • Merge — three-way diff and merge for content sync. If posts edit on both the source and the destination side, this is how you reconcile them. Reference →
    • +
    • HttpServer — a tiny local listening port for OAuth callbacks during a CLI workflow, fixture servers for HttpClient tests, or a status page during a long import. Not for production traffic. Reference →
    • +
    • CORSProxy — when you ship the importer as a browser tool, a server-side proxy to fetch URLs that don't send the right CORS headers. Reference →
    • +
    • CLI — POSIX-style argument parser to wrap your importer as importer.php --site-url=… --dry-run. Reference →
    • +
    • Encoding — UTF-8 validation and scrubbing for inputs that may contain mixed encodings. Most importers eventually need it. Reference →
    • +
    • XML — the cursor-based XML processor underneath DataLiberation; reach for it directly when you need to walk export-sized files. Reference →
    • +
    • Blueprints — declarative site setup. Spin up the destination WordPress with the right plugins and options before running the importer against it. Reference →
    • +
    • Polyfill — WordPress-shaped helpers (esc_html, add_filter, __) so toolkit code can run outside WordPress without ifdefs. Reference →
    • +
    • ToolkitCodingStandards — PHPCS sniffs encoding the project's review feedback as enforceable rules. Borrow if your project follows WordPress style. Reference →
    • +
    +
    + +
    +

    Patterns worth keeping

    + +

    Three shapes recurred across the tutorial. Watch for them in your own code:

    + +

    Cursor over a string

    +

    WP_HTML_Tag_Processor walks a string forward, records edits as a side-buffer of byte-range replacements, and emits the modified string only when you call get_updated_html(). The result is byte-honest — bytes you didn't edit come through bit-identical. When you need to make small changes to large markup, that property is gold. The XML component's XMLProcessor applies the same pattern to XML.

    + +

    Pull / consume streams

    +

    ZipFilesystem::open_read_stream(), HttpClient response bodies, InflateReadStream, and the rest all share the same shape: pull(N) reads up to N bytes from the underlying source into an internal buffer and returns how many ended up there; consume(N) reads N bytes from that buffer and advances past them. Memory used is bounded by the chunk size, never by the file size. Once you internalize this loop you can compose any byte source with any byte sink.

    + +

    One interface, multiple backends

    +

    Code that takes a Filesystem rather than a path doesn't care if the filesystem is on disk, in memory, in a SQLite database, or inside a ZIP. That's how the importer's stage works for both production (memory) and debugging (local disk) without a code change. Same pattern shows up in HttpClient (curl vs sockets transport) and ByteStream (file, memory, deflate, hash all implementing the same byte-stream interface).

    +
    + +
    +

    Where to go from here

    + +

    Three honest paths:

    + +
      +
    1. Take the importer further. Add a --dry-run flag with the CLI component. Snapshot each run into a Git repository so you can diff between imports. Wrap it in a CORSProxy-fronted browser tool. Each of those is a one-component addition; the structure you have already accommodates them.
    2. +
    3. Pick a single component and go deep. The reference pages all have refinements past the minimal example — bookmarks and breadcrumbs in HTML, three-way merges in Git, sliding windows and resumable downloads in HttpClient. The depth is there when the project asks for it.
    4. +
    5. Read the source. Each component lives under components/<Name>/. components/HTML/class-wp-html-tag-processor.php is the same code WordPress core ships in wp-includes/html-api/; components/Zip/class-zipdecoder.php is a clean implementation of the parts of the ZIP spec that the toolkit actually uses. The code is written to be read.
    6. +
    +
    + +
    + Browse all 18 components → + Back to landing + GitHub +
    + +
    + + + + diff --git a/docs/reference/blockparser.html b/docs/reference/blockparser.html new file mode 100644 index 000000000..d58906482 --- /dev/null +++ b/docs/reference/blockparser.html @@ -0,0 +1,330 @@ + + + + + +BlockParser — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + +
    + +

    BlockParser

    + +

    WordPress core's block parser, packaged as a standalone library. Turn block markup into a structured tree, lint posts for common authoring mistakes, and audit block usage — all without booting WordPress.

    + +
    composer require wp-php-toolkit/blockparser
    + + + +

    Block markup is not plain HTML. A post can contain HTML comments that identify blocks, JSON attributes inside those comments, freeform HTML between blocks, and nested blocks whose rendered HTML is interleaved with parent markup.

    This component packages WordPress core's block parser so importers, linters, migration tools, and static analyzers can understand block content without loading WordPress. It deliberately mirrors core behavior — same array shape, same null blocks for freeform HTML, same core block names such as core/paragraph — so code written against this parser keeps working when run inside WordPress, and vice versa.

    Reach for it when you need answers about the block tree: which blocks a post uses, which attributes they carry, where nested blocks appear, or whether content violates a rule your project cares about.

    + +

    What you get back

    + +

    WP_Block_Parser::parse() returns an array of blocks. Each block is an associative array with five keys: blockName, attrs, innerBlocks, innerHTML, and innerContent.

    innerHTML is the HTML inside the block with inner blocks stripped out. innerContent is the interleaved version: an array of HTML strings with null placeholders marking where each inner block belongs.

    Most code starts by checking blockName, then reading attrs or innerHTML. When a post has container blocks such as Group, Columns, or Navigation, look inside innerBlocks too.

    + +

    A minimal example

    + +

    The simplest possible use. Pass a string, get back a tree.

    + + + + + + +

    Refinement: count every block type in a post

    + +

    A common audit task: "How many Paragraph, Image, and Gallery blocks does this post use?" A small queue keeps the example readable while still visiting nested blocks.

    + + + + + + +

    Refinement: check whether a post uses a block

    + +

    Useful for templates, audits, and migrations: answer one yes/no question without caring where the block appears in the tree.

    + + + + + + +

    Refinement: lint headings for hierarchy mistakes

    + +

    "Don't skip from H2 to H4" is a real accessibility rule. The helper below keeps headings in document order, including headings nested inside Group, Column, and Cover blocks.

    + + + + + + +

    Refinement: find all instances of a custom block

    + +

    When auditing an export for a block your plugin owns, collect every match and print the fields a human cares about.

    + + + + + + +

    Refinement: detect blocks with stale embed URLs

    + +

    A real-world content audit: find every core/embed whose URL points at a domain you have retired.

    + + + + + + +

    Pitfalls

    + + + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/blueprints.html b/docs/reference/blueprints.html new file mode 100644 index 000000000..2c69e22bb --- /dev/null +++ b/docs/reference/blueprints.html @@ -0,0 +1,228 @@ + + + + + +Blueprints — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + +
    + +

    Blueprints

    + +

    Declarative WordPress site provisioning. Write a JSON description of plugins, options, and content; let the runner execute it.

    + +
    composer require wp-php-toolkit/blueprints
    + +

    A WordPress environment is more than a database dump. It can require a specific core version, plugins, themes, site options, uploaded files, content, and setup steps. Rebuilding that by hand makes demos, tests, bug reports, workshops, and CI fixtures drift over time.

    The Blueprints component treats site setup as data. A blueprint JSON document describes the desired steps, and the runner applies them to either a new WordPress install or an existing one. The validator exists because user-authored JSON needs clear, path-specific errors rather than generic schema failures.

    RunnerConfiguration separates the web root from the WordPress core directory, since real hosts often put them in different places. Both paths are explicit on the runner, never inferred.

    Blueprints can create a new WordPress install (download core, set up the database, apply steps) or apply to an existing site. Creating a fresh install needs filesystem access this in-browser runtime doesn't have, so the runnable snippets focus on APPLY_TO_EXISTING_SITE.

    + +

    A minimal example

    + +

    RunnerConfiguration is a fluent builder. The minimum: target site root, target site URL, execution mode.

    + + + + + + +

    Refinement: generate blueprint JSON from PHP

    + +

    CI jobs and tests stay clearer when PHP builds the blueprint from data instead of hand-writing JSON. Keep the structure plain: version, then a list of step arrays.

    + + + + + + +

    Refinement: validate before running

    + +

    The schema validator returns a human-readable ValidationError instead of a generic "does not match schema" failure. Use it before handing user-authored JSON to a runner.

    + + + + + + +

    The Blueprint JSON shape

    + +

    A blueprint is a JSON document with a version field and a steps array. Each step has a "step" discriminator and step-specific fields. This is the same shape used by WordPress Playground.

    + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/bytestream.html b/docs/reference/bytestream.html new file mode 100644 index 000000000..eed37bf85 --- /dev/null +++ b/docs/reference/bytestream.html @@ -0,0 +1,244 @@ + + + + + +ByteStream — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + +
    + +

    ByteStream

    + +

    Composable streaming primitives for reading, writing, transforming, hashing, and compressing byte data. Pull/peek/consume semantics let parsers backtrack without copying, and deflate, inflate, and checksum filters snap together like Lego.

    + +
    composer require wp-php-toolkit/bytestream
    + +

    PHP's native streams are powerful but inconsistent. fread on a socket may return short reads with no warning; stream_filter_append is awkward to compose; gzip helpers and file handles expose different APIs. The ByteStream component normalizes these behind one small interface — pull / peek / consume — so a parser, a hash function, and a deflate filter all see the same shape.

    The split between pull (buffer up to N bytes) and consume (advance past N bytes) is the secret. Parsers can peek ahead to detect a record boundary and decide whether to consume, without copying or allocating.

    + +

    A minimal example

    + +

    The canonical loop. pull(N) reads up to N bytes from the underlying source into an internal buffer and returns how many ended up there; consume(N) reads N bytes from that buffer and advances past them. The buffer never grows beyond the chunk size you ask for.

    + + + + + + +

    Refinement: memoryPipe as write-then-read buffer

    + +

    MemoryPipe is bidirectional: you append_bytes() as a writer and pull/consume as a reader. Easiest way to wire one component's output into another's input.

    + + + + + + +

    Refinement: compress on the way in, decompress on the way out

    + +

    Wrap a stream in DeflateReadStream to get compressed bytes out; wrap it in InflateReadStream to get decompressed bytes out. Both are full ByteReadStream implementations, so they nest into anything else that takes a stream.

    + + + + + + +

    Refinement: line-by-line reads from a chunked source

    + +

    Reading text by line means handling chunk boundaries that fall mid-line. Keep the trailing partial line and prepend it to the next pull. The rest of the loop pretends the data was always whole.

    + + + + + + +

    Refinement: limit a stream to a fixed window

    + +

    LimitedByteReadStream exposes only the next N bytes of an underlying stream as if those were the entire stream. This is how the ZIP decoder hands you the body of one entry without letting you read into the next.

    + + + + + + +

    Pitfalls

    + + + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/cli.html b/docs/reference/cli.html new file mode 100644 index 000000000..a42c80a04 --- /dev/null +++ b/docs/reference/cli.html @@ -0,0 +1,273 @@ + + + + + +CLI — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + +
    + +

    CLI

    + +

    POSIX-style argument parser. Long options, short bundles, inline values, positional args — one static call.

    + +
    composer require wp-php-toolkit/cli
    + +

    Real CLI tools in PHP usually mean either pulling in symfony/console (and the transitive dependencies that come with it) or hand-rolling argv parsing that breaks the first time someone writes -vvv or --port=8080. The toolkit's CLI class is one static method, no dependencies, and handles the POSIX shapes you actually see.

    + +

    A minimal example

    + +

    The smallest useful invocation: one boolean flag, one positional. Each option is a four-tuple of [ short, has_value, default, description ].

    + + + + + + +

    Refinement: mix values, flags, and bundles

    + +

    The parser accepts --port 8080, --port=8080, -p 8080, and -p=8080. It also expands bundled boolean shorts such as -afv.

    + + + + + + +

    Refinement: validate required options

    + +

    The parser fills in defaults but never enforces "required". Check for null after parsing — full control over the error message.

    + + + + + + +

    Refinement: generate --help from definitions

    + +

    Because each option carries its own description, you can render help text by walking the same definitions you parse with. No second source of truth.

    + + + + + + +

    Refinement: git-style subcommands

    + +

    To build a tool with subcommands like mytool deploy, peel the first positional off argv, dispatch, and parse the rest with a per-command option set.

    + + + + + + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/coding-standards.html b/docs/reference/coding-standards.html new file mode 100644 index 000000000..228ac0bdb --- /dev/null +++ b/docs/reference/coding-standards.html @@ -0,0 +1,90 @@ + + + + + +ToolkitCodingStandards — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + +
    + +

    ToolkitCodingStandards

    + +

    PHP_CodeSniffer sniffs used by this project: enforce Yoda comparisons and ban the short ternary where it hides falsy-value bugs.

    + +
    composer require wp-php-toolkit/toolkit-coding-standards
    + +

    This package is not a general-purpose style guide. It holds project-specific PHP_CodeSniffer rules for review comments the toolkit wants automated: comparisons should follow the WordPress Yoda style, and short ternaries should not hide whether a fallback is meant for null only or for all falsy values.

    Use it in this monorepo, or in a project that intentionally wants the same review tradeoffs. If your project does not follow WordPress-style comparisons, the Yoda sniff is probably the wrong rule for you.

    + +

    Reference the standard from your phpcs.xml

    + +

    The component is a PHPCS ruleset, so the useful examples are configuration and before/after code rather than runtime snippets. Activate both sniffs at once by referencing WordPressToolkitCodingStandards:

    Then run phpcs and phpcbf the usual way:

    + +

    EnforceYodaComparison: catches accidental assignment

    + +

    Yoda comparisons (true === $x) make typo-induced assignments easier to catch and match the WordPress style used throughout the toolkit:

    The sniff covers ===, !==, ==, and !=, and stays quiet when both sides are dynamic.

    + +

    Why ban the short ternary

    + +

    Developers confuse the short ternary ($a ?: $b) with the null-coalescing operator ($a ?? $b). They differ on falsy-but-not-null values: 0 ?: 'fallback' returns 'fallback', but 0 ?? 'fallback' returns 0. The sniff bans ?: entirely so reviewers don't have to relitigate this on every PR.

    + +

    Review-friendly replacements

    + +

    When the fallback should apply only to null, use ??. When the fallback should apply to every falsy value, write the full ternary so the intent is visible in review.

    + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/corsproxy.html b/docs/reference/corsproxy.html new file mode 100644 index 000000000..517404723 --- /dev/null +++ b/docs/reference/corsproxy.html @@ -0,0 +1,160 @@ + + + + + +CORSProxy — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + +
    + +

    CORSProxy

    + +

    A small PHP CORS proxy intended for browser-side code that needs to reach servers without CORS headers.

    + +
    composer require wp-php-toolkit/corsproxy
    + +

    A Playground-style browser tool reads https://api.github.com/repos/WordPress/php-toolkit, a plugin ZIP from downloads.wordpress.org, or a raw fixture from GitHub. The browser blocks the response when the upstream server does not send the required CORS headers, even though PHP can fetch the same public URL server-side.

    The CORSProxy component is that server-side bridge. It accepts a target URL, fetches it from PHP, and returns a browser-readable response. Because an open proxy is a security and abuse risk, real deployments should add host allowlists, rate limits, header controls, and private-network protections appropriate to their environment.

    + +

    Run the proxy locally

    + +

    A minimal example

    + +

    Drop a cors-proxy-config.php next to cors-proxy.php. If that file defines a playground_cors_proxy_maybe_rate_limit() function, the proxy calls it before forwarding any request — your one chance to reject early. Without the file, the proxy applies its default rate limiter, which is fine for development but should be replaced for any deployment that gets real traffic.

    This example uses a per-IP token bucket stored on disk. Replace with Redis or memcached for multi-host deployments.

    + + + + + +

    Refinement: allowlist upstream hosts

    + +

    Out of the box the proxy will fetch any public URL. Most real deployments want a fixed list of upstreams — GitHub, Packagist, wp.org. Both the rate-limit logic and the allowlist live in the same hook, since cors-proxy.php only calls playground_cors_proxy_maybe_rate_limit() once. The example below shows just the allowlist concern; in practice you stack both in one function inside cors-proxy-config.php.

    + + + + + +

    Browser-side fetch through the proxy

    + +

    Once deployed, the client side is just fetch() with the proxy URL. Drop this into any HTML page.

    + +

    Deploy behind nginx

    + +

    The proxy is a single PHP script — any SAPI works. nginx + php-fpm is a common production setup. PATH_INFO is what the proxy reads to learn the target URL.

    + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/dataliberation.html b/docs/reference/dataliberation.html new file mode 100644 index 000000000..70c0ee2b6 --- /dev/null +++ b/docs/reference/dataliberation.html @@ -0,0 +1,316 @@ + + + + + +DataLiberation — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + +
    + +

    DataLiberation

    + +

    Streaming WordPress import/export. WXR, SQL, block markup — without loading whole datasets into memory.

    + +
    composer require wp-php-toolkit/data-liberation
    + +

    WordPress content should be portable, but real migrations cross several formats. A site export might arrive as WXR, a Markdown folder, or entities from another CMS. URLs can hide in block attributes, HTML, CSS, feeds, GUIDs, and post meta. Importers must also resume after a failed media download or upload.

    The DataLiberation component streams WordPress-shaped data through readers, transformers, and writers. It models posts, terms, comments, attachments, and metadata as ImportEntity objects, then lets a pipeline rewrite each entity without loading the full export into memory.

    The API reflects specific migration bugs: relative URLs in known block attributes, URLs inside inline CSS, self-closing block comments that must keep their shape, and origin-only URLs whose trailing slash style should not change during a rewrite.

    Reach for it when the job combines formats: build WXR from another CMS, rewrite a staging export for production, frontload remote assets, or compose Markdown, XML, HTML, CSS, and URL rewriting into one pipeline.

    + +

    A minimal example

    + +

    Stream a single post into a WXR document via WXRWriter. The writer holds no buffer beyond what is needed to close currently-open tags, so memory stays flat regardless of input size.

    + + + + + + +

    Refinement: build a WXR programmatically from any source

    + +

    The writer doesn't care where entities come from. Loop over rows from a CMS, a CSV, or a Notion API dump and emit posts plus their meta and comments.

    + + + + + + +

    Refinement: read entities from a WXR file with constant memory

    + +

    WXREntityReader emits one entity at a time. A 10 GB WXR uses the same memory as a 10 KB one.

    + + + + + + +

    Refinement: streaming transform: rewrite URLs while copying WXR

    + +

    Wire reader to writer to rewrite a WXR file on the fly. This pattern is how you migrate a staging export to production: swap staging.example.com for example.com without ever loading the file into memory.

    + + + + + + +

    Refinement: render Markdown into a WXR import in one pipeline

    + +

    Compose MarkdownConsumer with WXRWriter to publish a folder of Markdown directly as a WordPress import file.

    + + + + + + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/encoding.html b/docs/reference/encoding.html new file mode 100644 index 000000000..aaea5042e --- /dev/null +++ b/docs/reference/encoding.html @@ -0,0 +1,233 @@ + + + + + +Encoding — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + +
    + +

    Encoding

    + +

    UTF-8 validation and scrubbing with a pure-PHP fallback when mbstring is unavailable. Detects malformed bytes and replaces them per the Unicode maximal-subpart algorithm.

    + +
    composer require wp-php-toolkit/encoding
    + +

    Every parser in this toolkit eventually has to decide what to do with text bytes. XML rejects malformed UTF-8. JSON and databases can fail late. CSS, HTML, WXR, and Blueprint validation all need consistent answers about whether a string is well-formed Unicode.

    The Encoding component provides the small UTF-8 primitives the rest of the toolkit can share: validate bytes, scrub invalid sequences, scan code points, and detect Unicode noncharacters. When mbstring is available it can delegate to it; when it is not, the component uses its own byte scanner so behavior stays available in restricted PHP environments.

    Historically, this became the common foundation for Blueprint validation and CSS/XML processing, replacing ad hoc Unicode helpers with the WordPress core UTF-8 routines used here.

    + +

    A minimal example

    + +

    wp_is_valid_utf8() rejects overlong sequences, surrogate halves, and stray ISO-8859-1 bytes. Use it as a guard in front of any code path that assumes UTF-8 (database, JSON, XML).

    + + + + + + +

    Refinement: scrubbing invalid bytes with U+FFFD

    + +

    Replace each ill-formed sequence with the Unicode replacement character. Useful right before serializing to XML, JSON, or sending to an LLM that will choke on broken bytes.

    + + + + + + +

    Refinement: detecting noncharacters MySQL/utf8mb4 will reject

    + +

    Code points like U+FFFE, U+FFFF, and the U+FDD0–U+FDEF block are valid Unicode but forbidden in XML and rejected by some databases. Check before inserting user-submitted content into a strict utf8mb4 column.

    + + + + + + +

    Refinement: three-way pipeline: validate, scrub, then check noncharacters

    + +

    Real-world inputs are messy: an old WXR export, a CSV with mixed encodings, a paste from Word. Combination of validate + scrub + noncharacter-check covers the three classes of breakage that bite later.

    + + + + + + +

    Refinement: salvaging a legacy ISO-8859-1 column inside a UTF-8 corpus

    + +

    Old WordPress databases sometimes mix encodings: most rows are UTF-8 but a few were stored as latin-1. Detect the bad rows with wp_is_valid_utf8() and only re-encode those.

    + + + + + + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/filesystem.html b/docs/reference/filesystem.html new file mode 100644 index 000000000..22ae72421 --- /dev/null +++ b/docs/reference/filesystem.html @@ -0,0 +1,291 @@ + + + + + +Filesystem — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + +
    + +

    Filesystem

    + +

    One Filesystem interface across local disk, in-memory trees, SQLite databases, and ZIP archives. Forward-slash paths everywhere — even on Windows — so the same code runs in tests, in production, and inside read-only ZIPs.

    + +
    composer require wp-php-toolkit/filesystem
    + +

    Code that touches the filesystem is hard to test, hard to port to Windows, and impossible to point at non-disk storage without rewriting it. Swap LocalFilesystem for InMemoryFilesystem in tests and your suite stops touching /tmp; swap it for SQLiteFilesystem and your "files" become rows in a portable database; swap it for ZipFilesystem and you can read inside an archive with the same calls.

    Every backend uses forward slashes regardless of host OS. No DIRECTORY_SEPARATOR juggling, no Windows-only test failures, no surprises when a path moves between backends.

    + +

    A minimal example

    + +

    The fastest backend. No disk I/O, no cleanup, no test-isolation problems.

    + + + + + + +

    Refinement: test code without touching disk

    + +

    Code that takes a Filesystem parameter, instead of calling file_get_contents() directly, can be tested against an InMemoryFilesystem. The test sets up files in memory, exercises the function, and asserts on what got written — no temp directories, no cleanup.

    + + + + + + +

    Refinement: local disk with a chrooted root

    + +

    LocalFilesystem::create($root) is implicitly chrooted: every path resolves relative to $root and a ../ cannot escape. Reach for it when a request path or CLI argument names a file inside one project directory.

    + + + + + + +

    Refinement: sQLite as a portable file store

    + +

    The whole tree lives in one SQLite database file. Use it for self-contained scratch storage that survives process boundaries without leaving loose files behind.

    + + + + + + +

    Refinement: copy a tree across backends

    + +

    The killer composability move: copy_between_filesystems() streams files chunk-by-chunk from any source to any target. Pull a ZIP into SQLite, snapshot SQLite to disk, mirror disk into RAM — all the same call.

    + + + + + + +

    Refinement: atomic write via tempfile rename

    + +

    Write to a sibling tempfile, then rename — that's how you avoid leaving a half-written file on crash. rename() is atomic within a single filesystem.

    + + + + + + +

    Refinement: path helpers that behave the same on Windows

    + +

    Unix path semantics apply on every host OS. This matters for abstract paths such as a SQLite key or a ZIP entry name because those paths do not live on a real drive.

    + + + + + + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/git.html b/docs/reference/git.html new file mode 100644 index 000000000..4508e7b05 --- /dev/null +++ b/docs/reference/git.html @@ -0,0 +1,304 @@ + + + + + +Git — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + +
    + +

    Git

    + +

    A pure-PHP Git client and server. Commits, branches, diffs, HTTP push/pull — all without shelling out to git.

    + +
    composer require wp-php-toolkit/git
    + +

    Git is a useful storage model even when a server cannot run the git binary: snapshots, branches, object-addressed files, diffs, merges, and sync over HTTP. That matters for WordPress tools that want revision history for generated files, content snapshots, site state, or collaborative edits in constrained runtimes.

    The Git component implements the core repository operations in PHP and stores objects through the toolkit Filesystem interface. That means the same repository can live on disk, in memory, or in another backend, and higher-level code can commit files without knowing where objects are stored.

    The docs start with simple commits because that mental model scales: a repository is just objects plus refs. From there, branches, history walking, root commits, and merges become details you can reason about instead of magic shell behavior.

    Choose it for tests, browser-like sandboxes, hosted WordPress environments, and applications that need Git behavior through PHP APIs instead of shell commands.

    + +

    A minimal example

    + +

    The simplest possible repository: an InMemoryFilesystem as object storage and one commit() call. Reach for this in tests, in WP-CLI snapshots, or any place you want versioning without touching disk.

    + + + + + + +

    Refinement: walk the commit history

    + +

    Follow the parent chain from HEAD backwards. Building block for a WP-CLI "post revisions" log or a "what changed since release X" report.

    + + + + + + +

    Refinement: treat a repository like a filesystem

    + +

    GitFilesystem wraps a repository in this toolkit's Filesystem interface. With the default options, each put_contents() records a new commit.

    + + + + + + +

    Refinement: branch, edit, and switch back

    + +

    Create a feature branch off the current commit, change files, flip HEAD back. Useful for experimental edits in collaborative tools.

    + + + + + + +

    Refinement: three-way merge two branches

    + +

    The classic Git workflow: branch off, edit on each side, merge. $repo->merge() finds the common ancestor, three-way-merges every file, and creates a merge commit.

    + + + + + + +

    Refinement: snapshot WordPress options into a repo

    + +

    Serialize a chunk of WP state (options, post meta, a theme config) on every save and commit it. You get free history, diffs between snapshots, and a "rollback to last week" button.

    + + + + + + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/html.html b/docs/reference/html.html new file mode 100644 index 000000000..77c86e8c3 --- /dev/null +++ b/docs/reference/html.html @@ -0,0 +1,284 @@ + + + + + +HTML — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + + + +
    + +

    HTML

    + +

    A browser-grade HTML5 parser and tag rewriter for PHP, ported from WordPress core. Walk HTML as a cursor, edit attributes byte-for-byte, query by ancestry — without libxml2, without re-serializing, and without losing the bytes you didn't ask to change.

    + +
    composer require wp-php-toolkit/html
    + + + +

    WordPress filters HTML constantly: post content, comments, excerpts, feeds, block markup, imported documents. Those fragments often omit <html> and <body>, close tags implicitly, and contain markup browsers handle gracefully but DOMDocument chokes on. The HTML component matches what browsers do — without the extension.

    + +

    The component gives you two processors. WP_HTML_Tag_Processor is a forward-only cursor over tags and tokens; use it for attribute rewriting at scale. WP_HTML_Processor layers HTML5 tree construction on top so you can query by ancestry (breadcrumbs), serialize the parsed document, and trust that <p>one<p>two parses as two paragraphs the way a browser sees it.

    + +

    A minimal example

    + +

    Add loading="lazy" to every image in a fragment. The smallest demonstration of every property the component is built around: walk by tag name, conditional edits, byte-honest output.

    + + + + + + +

    The Tag Processor is allocation-light by design: edits are recorded as byte-range replacements and applied lazily when you call get_updated_html(). The first time you read the modified HTML the cost is proportional to the number of edits, not to the size of the document.

    + +

    Refinement: rewrite relative URLs

    + +

    Walk every <a> tag, classify the href as already-absolute or not, and prepend a base URL when needed. Same processor, different filter:

    + + + + + + +

    Refinement: strip script tags and on* handlers

    + +

    The cursor exposes one trick the others don't: get_attribute_names_with_prefix(). Combined with set_modifiable_text('') for raw-text elements, you can neutralize the most common pasted-HTML XSS vectors in one walk.

    + + +'; + +$tags = new WP_HTML_Tag_Processor( $html ); +while ( $tags->next_tag() ) { + if ( 'SCRIPT' === $tags->get_tag() && ! $tags->is_tag_closer() ) { + $tags->set_modifiable_text( '' ); + } + foreach ( $tags->get_attribute_names_with_prefix( 'on' ) as $attr ) { + $tags->remove_attribute( $attr ); + } +} +echo $tags->get_updated_html(); + + + + + +

    Refinement: stamp a CSP nonce

    + +

    For Content Security Policy in nonce- mode, every inline <script> and <style> needs a matching nonce attribute. Tag-by-tag is exactly the right granularity:

    + + +'; + +$tags = new WP_HTML_Tag_Processor( $html ); +while ( $tags->next_tag() ) { + $tag = $tags->get_tag(); + if ( ( 'SCRIPT' === $tag || 'STYLE' === $tag ) && ! $tags->is_tag_closer() ) { + $tags->set_attribute( 'nonce', $nonce ); + } +} +echo "nonce: $nonce\n" . $tags->get_updated_html(); + + + +

    Refinement: query by ancestry with WP_HTML_Processor

    + +

    The Tag Processor walks tags as a flat sequence — fast and lean, but blind to context. When you need "every <img> directly inside a <figure>" or "the first <h1> outside any <blockquote>," reach for WP_HTML_Processor. It implements HTML5 tree construction, exposes get_breadcrumbs(), and accepts a breadcrumbs filter on next_tag():

    + + + + + + +

    Notice that the <img> inside the <p> didn't get the class — its breadcrumbs are (ARTICLE, P, IMG), not (FIGURE, IMG). The full processor is the right tool whenever the question contains the word "inside."

    + +

    Refinement: bookmarks for backward edits

    + +

    The Tag Processor is forward-only, but bookmarks let you save a position, scan ahead, and seek back to rewrite an earlier tag based on what you found later:

    + + + + + + +

    Bookmarks are explicitly released because each one pins a small amount of state in the processor. In a long document with many bookmarks, releasing them as soon as you're done keeps memory flat.

    + +

    When to use which

    + + + + + + + +
    UseFor
    WP_HTML_Tag_ProcessorAttribute rewriting, sanitization, finding tags by name. Forward-only walks. Anything where speed and byte-honesty matter more than context.
    WP_HTML_Processor::create_fragment()Queries by ancestry (breadcrumbs), heading outline extraction, anything that needs to know "is this tag inside that one."
    WP_HTML_Decoder::decode_text_node()Turning entity-encoded text (AT&amp;T) back into raw text correctly. Implements the HTML5 entity algorithm — don't roll your own.
    WP_HTML_Decoder::attribute_starts_with()Safe URL-prefix checks that respect encoded characters (java&#x09;script:). The classic strpos approach misses these.
    + +

    Pitfalls

    + + + + + + + + + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/httpclient.html b/docs/reference/httpclient.html new file mode 100644 index 000000000..782c4b0f9 --- /dev/null +++ b/docs/reference/httpclient.html @@ -0,0 +1,622 @@ + + + + + +HttpClient — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + +
    + +

    HttpClient

    + +

    Async HTTP client without curl required. Uses sockets when curl is missing, supports concurrent requests and streaming responses.

    + +
    composer require wp-php-toolkit/http-client
    + +

    A plugin installer starts with one request to download plugin.zip. A migration then adds progress reporting, a ten-request media window, resumable downloads, and a remote ZIP reader that feeds ZipFilesystem directly. Those workflows need the same request API from the first GET to the final streamed archive.

    The HttpClient component gives the toolkit a small request/response model, middleware for redirects and caching, concurrent fetches, and response bodies exposed as byte streams. It runs through curl when PHP provides curl and through pure PHP sockets when it does not. Callers keep the same code path.

    Use it to fetch plugin metadata, submit import callbacks, mirror a media library, read a WXR export, or pipe a remote archive into Zip and Filesystem code.

    + +

    A minimal example

    + +

    The smallest flow has three steps: create a request, wait until headers arrive, then consume the body stream. This is intentionally close to the Fetch API shape, but the body is a toolkit byte stream instead of a buffered string.

    + + + + + +

    Refinement: pOST to a URL

    + +

    Uploads use the same shape. The only difference is that the request declares a method, request headers, and an upload body stream. Here the body is form-encoded text wrapped in MemoryPipe; a file upload could provide a file-backed read stream instead.

    + + + + + +

    Refinement: build a JSON request object

    + +

    A Request is just data until a client enqueues it. That makes it easy to test request construction without network access. The constructor normalizes headers, calculates content-length when the body stream has a known length, and moves URL credentials into an Authorization header.

    + + + + + + +

    Refinement: parse response headers

    + +

    Most applications receive Response objects from await_response(). Transports, middleware, and tests sometimes need the lower-level parser: Response::from_http_headers() turns raw HTTP header bytes into normalized status and case-insensitive headers.

    + + + + + + +

    Pick the right reading style

    + +

    There are three common ways to consume a response. Start simple, then move down the table only when the workflow demands it.

    + +

    Refinement: choose a transport

    + +

    The transport is the I/O backend. It should not change your request, response, redirect, cache, or stream code; it only changes how bytes move across the network.

    concurrency, timeout_ms, cache_dir, redirects, and response streaming sit above the transport, so the examples later on work with either backend.

    + + + + + +

    Refinement: follow redirects and inspect the final request

    + +

    Redirects are middleware, not transport behavior. The client follows up to five redirects by default. The original Request keeps a chain to the final request, so importers can log where a source URL actually landed.

    + + + + + +

    Refinement: cache repeatable GET responses

    + +

    Pass cache_dir to add disk caching for cacheable GET and HEAD responses. Fresh cached responses replay the same header/body events as a network response, so crawlers and importers do not need a separate cache code path. Non-GET requests invalidate matching cache entries instead of being cached.

    + + + + + +

    Refinement: handle failures without losing the queue

    + +

    Failures arrive as events. That lets a crawler, importer, package installer, or media frontloader log one bad URL and keep processing the rest of the queue. Treat failure handling as part of the event loop, not as one global try/catch around the whole batch.

    + + + + + +

    Refinement: monitor download progress

    + +

    When you care about progress, use the event loop directly. Count bytes from each EVENT_BODY_CHUNK_AVAILABLE event and compare them with Content-Length when the server provides one.

    + + + + + +

    Refinement: keep a sliding window of 10 requests

    + +

    For large queues, do not enqueue everything at once. Keep at most ten active requests, enqueue another as each one finishes, and let the client multiplex only that window.

    + + + + + +

    Refinement: resume a partial download

    + +

    Resuming is an HTTP contract between you and the server. Save what you already have, send a Range request for the remaining bytes, and append only if the server returns 206 Partial Content.

    + + + + + +

    Refinement: stream-unzip a remote archive

    + +

    Mount the remote archive with ZipFilesystem, then copy it into any writable filesystem. SeekableRequestReadStream caches received bytes to a temporary file so ZipFilesystem can read the central directory and seek to entries without first writing the ZIP yourself.

    + + + + + +

    Refinement: parallel fan-out: fetch many URLs at once

    + +

    Enqueue a batch of requests and react to events as they fire. The client multiplexes them — total wall time is roughly the slowest request, not the sum.

    + + + + + +

    Refinement: stream a download to disk without OOM

    + +

    Process the body chunk-by-chunk via the event loop. Memory stays flat regardless of file size.

    + + + + + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/httpserver.html b/docs/reference/httpserver.html new file mode 100644 index 000000000..66c499ece --- /dev/null +++ b/docs/reference/httpserver.html @@ -0,0 +1,185 @@ + + + + + +HttpServer — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + +
    + +

    HttpServer

    + +

    A minimal blocking TCP HTTP server in pure PHP. For CLI tools and tests, not for production traffic.

    + +
    composer require wp-php-toolkit/http-server
    + +

    Sometimes a PHP tool needs a tiny local HTTP surface: a test fixture server, a webhook receiver during development, a CLI tool with a browser UI, or a demo endpoint for another component. Pulling in a production web framework would obscure the example and add dependencies the toolkit avoids.

    The HttpServer component is intentionally small: a blocking TCP server, incoming request objects, and response writers. It is useful for local tools and tests. It is not a replacement for nginx, Apache, php-fpm, RoadRunner, Swoole, or a production application server.

    + +

    A minimal example

    + + + + + +

    Refinement: a tiny JSON router

    + +

    Build a CLI tool with a web UI by switching on the parsed path and method.

    + + + + + +

    Refinement: buffered response with auto Content-Length

    + +

    Use BufferingResponseWriter when you want the framework to compute Content-Length for you, or when the runtime is CGI-shaped and expects the full body up front. This one runs anywhere — no socket required.

    + + + + + + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/index.html b/docs/reference/index.html new file mode 100644 index 000000000..5330e5107 --- /dev/null +++ b/docs/reference/index.html @@ -0,0 +1,74 @@ + + + + + +Reference — PHP Toolkit + + + + +
    + PHP Toolkit + +
    + +
    + +

    Reference

    + +

    One concept guide per component. Each page assumes you know what HTML is, what a ZIP file is, what a stream is — and explains how this particular component models that thing in pure PHP. If a concept appears for the first time in the tutorial, the reference page links back.

    + +
    +

    Content and migration

    + +
    + +
    +

    Streams and storage

    + +
    + +
    +

    Networked tools

    + +
    + +
    +

    WordPress runtime support

    + +
    + +
    + + + + diff --git a/docs/reference/markdown.html b/docs/reference/markdown.html new file mode 100644 index 000000000..227aebbe1 --- /dev/null +++ b/docs/reference/markdown.html @@ -0,0 +1,263 @@ + + + + + +Markdown — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + +
    + +

    Markdown

    + +

    Bidirectional converter between Markdown and WordPress block markup. Useful for moving content between Markdown files and WordPress while preserving the structures both formats can express.

    + +
    composer require wp-php-toolkit/markdown
    + + + +

    Many publishing workflows start in Markdown: documentation sites, static-site generators, Git-backed editorial workflows, Obsidian vaults, and developer notes. WordPress stores editor content as block markup. Moving between those worlds by string replacement loses metadata and quickly breaks on lists, tables, code blocks, and frontmatter.

    The Markdown component provides a structured bridge. MarkdownConsumer turns Markdown plus frontmatter into block markup and metadata; MarkdownProducer turns supported block markup back into Markdown. The conversion is meant for practical content workflows, not byte-identical round-tripping of every custom block attribute.

    + +

    A minimal example

    + +

    Feed Markdown into MarkdownConsumer, get block markup back. The result is a BlocksWithMetadata object that holds both the rendered blocks and any frontmatter parsed from the document.

    + + + + + + +

    Refinement: round-trip: blocks back to Markdown

    + +

    Pair MarkdownProducer with MarkdownConsumer to convert in either direction. Round-tripping is lossy for block attributes that have no Markdown representation (custom classes, alignment), so do not expect byte-perfect equality.

    + + + + + + +

    Refinement: reading YAML frontmatter as post meta

    + +

    Frontmatter keys come back as arrays so a single key can hold multiple values. Use get_meta_value() when you only want the first scalar.

    + + + + + + +

    Refinement: migrating an Obsidian or Hugo folder of Markdown

    + +

    Walk a directory of .md files (Obsidian vault, Hugo content/, Jekyll _posts) and emit one block-markup record per file.

    + + + + + + +

    Refinement: counting blocks produced by a Markdown document

    + +

    After conversion, the block markup is plain WordPress block markup, so parse_blocks() works on it directly. The standard way to introspect what the converter emitted before saving to the database.

    + + + + + + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/merge.html b/docs/reference/merge.html new file mode 100644 index 000000000..7ced9da74 --- /dev/null +++ b/docs/reference/merge.html @@ -0,0 +1,283 @@ + + + + + +Merge — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + +
    + +

    Merge

    + +

    Three-way merge and diff. Pluggable differ + merger + optional validator.

    + +
    composer require wp-php-toolkit/merge
    + +

    Content synchronization needs more than "last write wins." A Markdown file changes in Git while the same post changes in WordPress. A generated config changes through both a CLI tool and a UI. In those cases you need a common ancestor, two edited versions, and a way to explain conflicts to a human.

    The Merge component provides the diff and three-way merge primitives used by those workflows. The default examples are line-oriented because that is the most familiar shape, but the strategy is intentionally pluggable: choose the differ, choose the merger, and optionally validate the merged result before accepting it.

    Use the merge result to auto-accept independent edits and to show structured conflicts when a person must decide.

    + +

    A minimal example

    + +

    Feed two strings to LineDiffer and inspect the operations. Every get_changes() entry is a [op, text] pair.

    + + + + + + +

    Refinement: render a unified patch

    + +

    format_as_git_patch() produces output that mirrors git diff, including hunk headers — handy for emails, CI annotations, or a "what changed?" panel.

    + + + + + + +

    Refinement: three-way merge with no conflicts

    + +

    The classic case: each branch changes a different region. Pass the common ancestor plus both edits to MergeStrategy::merge() and read the merged result.

    + + + + + + +

    Refinement: inspect and surface conflicts

    + +

    When both sides edit the same region, the merger produces a MergeConflict. The merged content carries Git-style markers, but the structured get_conflicts() output is what you want for a UI that lets the user pick a side.

    + + + + + + +

    Refinement: sync a Markdown folder against an edited DB copy

    + +

    A real-world scenario: posts live both in a Git-tracked Markdown folder and in WordPress, and someone edits each. Three-way-merge each post against its common ancestor.

    + + + + + + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/polyfill.html b/docs/reference/polyfill.html new file mode 100644 index 000000000..258fbe9c5 --- /dev/null +++ b/docs/reference/polyfill.html @@ -0,0 +1,215 @@ + + + + + +Polyfill — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + +
    + +

    Polyfill

    + +

    PHP 8 string functions on PHP 7.2+, WordPress hook stubs, and translation/escaping passthroughs so toolkit code runs without WordPress.

    + +
    composer require wp-php-toolkit/polyfill
    + + + +

    A lot of WordPress-adjacent code wants to call esc_html(), __(), or apply_filters() without booting WordPress. The polyfill component provides minimal but real implementations so that code runs unchanged outside WordPress, and stays out of the way when WordPress is loaded (every function uses function_exists() guards).

    + +

    A minimal example

    + +

    The polyfills define str_contains, str_starts_with, str_ends_with, and array_key_first only when missing.

    + + + + + + +

    Refinement: escaping and translation stubs

    + +

    Pass-through implementations let you write code that looks WordPressy and runs anywhere.

    + + + + + + +

    Refinement: a simple filter chain

    + +

    The hook system is a real implementation of the WordPress filter API: registered callbacks get applied in priority order, and each one transforms the running value.

    + + + + + + +

    Refinement: priority ordering and multi-arg passing

    + +

    Lower priority numbers run first. The fourth argument to add_filter controls how many context values get passed to the callback.

    + + + + + + +

    Refinement: hook-based extension points in standalone libraries

    + +

    Use do_action and apply_filters as cheap extension points in your own code, without depending on WordPress.

    + + + + + + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/xml.html b/docs/reference/xml.html new file mode 100644 index 000000000..89a3bdfeb --- /dev/null +++ b/docs/reference/xml.html @@ -0,0 +1,230 @@ + + + + + +XML — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + +
    + +

    XML

    + +

    A streaming, namespace-aware XML processor in pure PHP. Read and modify huge feeds, WXR exports, ePub manifests, and Office Open XML parts without ever loading the document into memory and without depending on libxml2.

    + +
    composer require wp-php-toolkit/xml
    + +

    SimpleXMLElement and DOMDocument both need libxml2 and both build a complete in-memory tree. XMLProcessor walks the document forward as a cursor, keeps modifications in a side buffer, and emits the full updated XML with get_updated_xml() only when you ask for it.

    This design came from WordPress-scale documents such as WXR exports. A migration may only need to rewrite wp:attachment_url values or bump a feed attribute, so the processor optimizes for targeted cursor edits instead of a full validating XML stack.

    + +

    A minimal example

    + +

    Find each , read its price, write a new one, emit the updated document.

    + + + + + + +

    Refinement: read namespaced attributes from a WXR export

    + +

    WordPress's WXR commonly uses wp:, dc:, and content: prefixes bound to namespace names such as http://wordpress.org/export/1.2/. Pass that expanded namespace name, not the prefix; the processor handles whichever prefix the document actually uses.

    + + + + + + +

    Refinement: rewrite URLs across an entire WXR export

    + +

    Large WXR exports can hold many URLs in , , and post content. Streaming the file lets you rewrite large exports without loading the whole XML document into memory.

    + + + + + + +

    Refinement: parse OPML to extract feed URLs

    + +

    OPML is the format Feedly and many readers use to import/export feed lists. Flat, attribute-heavy XML — exactly what a tag processor handles best.

    + + + + + + +

    Pitfalls

    + + + + + +

    See also

    + + + +
    +
    + + + + diff --git a/docs/reference/zip.html b/docs/reference/zip.html new file mode 100644 index 000000000..cffff164c --- /dev/null +++ b/docs/reference/zip.html @@ -0,0 +1,391 @@ + + + + + +Zip — PHP Toolkit reference + + + + + + + +
    + PHP Toolkit + +
    + +
    + + + +
    + +

    Zip

    + +

    Read and write ZIP archives in pure PHP — no libzip, no ZipArchive extension, no shelling out. The decoder is pull-based, so listing the central directory of a 2 GB archive costs roughly the size of the directory itself; the encoder writes one entry at a time into any byte sink.

    + +
    composer require wp-php-toolkit/zip
    + +

    Common PHP ZIP workflows assume ZipArchive is available. It often isn't: shared hosts disable it, WebAssembly runtimes don't have it, Playground's browser-side PHP can't shell out to libzip. The Zip component reimplements the readable subset of the format in pure PHP — Stored and Deflate compression, Zip64 for archives over 4 GB, central-directory parsing — and exposes it through three layers, ordered from highest to lowest level.

    + +

    The highest-level type is ZipFilesystem: an archive presented through the toolkit's Filesystem interface, so you call get_contents() and ls() the same way you would on a local directory or an in-memory tree. Below that, ZipDecoder and ZipEncoder stream individual entries when you care about the format itself — building an EPUB whose mimetype entry must be stored uncompressed and first, repacking an archive while modifying one file. At the bottom, FileEntry is the small struct that names a path, a compression method, and a body reader.

    + +

    A minimal example

    + +

    Wrap a ZIP file as a filesystem and read one entry. This is the shape every later refinement is built on.

    + + + + + + +

    Three things to notice. The encoder takes a ByteWriteStream sink — here a file, but it could equally be an HTTP response body, an in-memory buffer, or another component's input. Each entry's body is itself a stream — MemoryPipe here, but for large files you'd pass a FileReadStream and the encoder would inflate from disk without buffering the whole entry in memory. And reading is one line: ZipFilesystem::create() wraps the byte reader, parses the central directory, and gives you the standard Filesystem interface.

    + +

    Refinement: build an EPUB

    + +

    EPUB is a ZIP archive with one rule the spec is strict about: the mimetype entry must be the first entry in the file, and it must be stored uncompressed. Everything else is up to you. The Zip component lets you express that constraint directly:

    + + + + + + + +

    Refinement: stream a large entry

    + +

    Calling get_contents( 'data.csv' ) on a 500 MB CSV inside a ZIP would buffer 500 MB of inflated data into a single PHP string. open_read_stream() returns a pull-based reader instead, so you can process the entry in chunks:

    + + + + + + + +

    Refinement: repack an archive

    + +

    Modifying one entry in place is impossible at the format level — the central directory points at byte offsets, so changing any entry's compressed size invalidates everything after it. The pragmatic answer is repack: stream the source into a new archive, swapping the entry you want to change.

    + + + + + +

    Refinement: defend against zip-slip

    + +

    A malicious archive can name an entry ../../etc/passwd and trick a naive extractor into writing outside the destination. Run every entry path through ZipDecoder::sanitize_path() before extraction:

    + + + + + +

    Refinement: pipe entries into another filesystem

    + +

    The whole point of ZipFilesystem implementing the Filesystem interface is composition. copy_between_filesystems() walks one filesystem and writes its contents into another — so unpacking a ZIP into an in-memory tree, where you can validate or edit before committing to disk, is one helper call:

    + + + + + +

    When to use which type

    + + + + + + + + +
    UseFor
    ZipFilesystem::create()Reading. You want get_contents(), ls(), is_dir() over a ZIP. The most common case.
    ZipEncoderWriting. Stream entries into any ByteWriteStream sink. Required when format rules matter (EPUB, .docx).
    ZipDecoderLow-level read access to the central directory and individual entry headers. Most code reaches for ZipFilesystem instead.
    open_read_stream() on a ZipFilesystemInflating a single large entry without buffering it whole in memory.
    copy_between_filesystems()Moving entries from a ZIP into another filesystem (memory, local, SQLite).
    + +

    Pitfalls

    + + + + + + + + + +

    See also

    + + + +
    +
    + + + + diff --git a/package.json b/package.json index d340c9b20..74f02b551 100644 --- a/package.json +++ b/package.json @@ -21,5 +21,6 @@ "bugs": { "url": "https://github.com/WordPress/php-toolkit/issues" }, - "homepage": "https://github.com/WordPress/php-toolkit#readme" + "homepage": "https://github.com/WordPress/php-toolkit#readme", + "packageManager": "npm@11.6.2+sha512.ee22b335fcbc95662cdf3ab8a053daf045d9cf9c6df6040d28965abb707512b2c16fa6c5eec049d34c74f78f390cebd14f697919eadb97756564d4f9eccc4954" } diff --git a/phpcs.xml b/phpcs.xml index c20486124..02ed3994a 100644 --- a/phpcs.xml +++ b/phpcs.xml @@ -11,6 +11,7 @@ /plugins/url-updater/ /bin/build-phar /examples/ + /docs/ rector.php components/CORSProxy/cors-proxy-functions.php components/Markdown/bin/build/*