Add parser experiments catalog and parse-only benchmark harness

JanJakes · JanJakes · commit be95d4bf17ab · 2026-06-06T17:48:59.000+02:00
Consolidates the parser/lexer performance experiments explored alongside the shipped optimizations (PR #378, built on #373/#375/#376). One directory and commit per approach; each has code and/or a NOTES.md with idea, method, result, verdict.
diff --git a/experiments/README.md b/experiments/README.md
@@ -0,0 +1,54 @@
+# MySQL parser performance experiments
+
+This branch consolidates and verifies the parser/lexer performance experiments
+that were explored while optimizing the pure-PHP MySQL parser. The shipped
+optimizations live in PR #378 (built on #373 / #375 / #376); the optional native
+Rust extension is PR #381 (and #423). The work here is the catalog of *other*
+approaches that were prototyped and measured along the way — most lived only in
+throwaway local branches or ephemeral sessions and had no home until now.
+
+Everything was re-measured on a MacBook Pro M4, PHP 8.5.5, PCRE2 10.47.
+Numbers drift ~10–15% with thermal/load; treat them as orders of magnitude and
+ratios, not exact constants.
+
+## How to run
+Warm tracing JIT (the production-relevant config):
+```
+-d memory_limit=2G -d opcache.enable_cli=1 -d opcache.jit_buffer_size=64M -d opcache.jit=tracing
+```
+No opcache: `-d opcache.enable_cli=0`. opcache without JIT: `-d opcache.enable_cli=1 -d opcache.jit=disable`.
+Always put `-d` flags BEFORE the script path. The corpus is the 69,577-query
+MySQL server-suite CSV at `packages/mysql-on-sqlite/tests/mysql/data/`.
+
+Verified parse-only baselines (best-of-N, reuse one parser, warm JIT):
+trunk ≈ 27,700 QPS; the optimized parser (#378) ≈ 56,500 QPS (≈2.0×);
+pure-regex recognition ≈ 98K; the parser in validate-only mode ≈ 246K.
+AST construction is ≈77% of parse time.
+
+## Experiments (one per directory, one per commit)
+`_harness/` holds the parse-only benchmark harnesses used throughout. Each
+experiment directory has a `NOTES.md` with the idea, how it was measured, the
+result, and a verdict; see each for origin (PR or local branch).
+
+- `whole-grammar-compilation/` — compile every rule to a dedicated PHP method.
+- `method-size-capping/` — cap compiled method size, stub the rest to the interpreter.
+- `ast-data-structures/` — object vs validate-only vs flat-int-tape vs array node.
+- `pratt-expression-cascade/` — Pratt operator-precedence parser for the expr chain.
+- `ll2-selectors/` — 2-token-lookahead proposal + the rule/call-split analysis behind it.
+- `lalr-table-driven/` — kmyacc/nikic-style action-goto table interpreter.
+- `packed-table-lookups/` — pack/unpack vs PHP-array action-table lookups.
+- `full-pcre-recognizer/` — fold the whole grammar into one recursive PCRE pattern.
+- `regex-prevalidate-hybrid/` — regex yes/no gate in front of the AST parser.
+- `multishape-fast-parser/` — per-query-shape regex → direct AST construction.
+- `pcre2-capture-trace/` — extract a parse tree from PCRE2 captures.
+- `pcre2-callouts-ffi/` — PCRE2 callouts via FFI to emit a structural trace.
+- `preg-replace-callback-shiftreduce/` — iterative mega-pattern reduction.
+- `binary-bottomup-reduction/` — the same, with fixed-width binary encodings.
+- `oniguruma-capture-trees/` — `(?@...)` capture trees (31-group cap; unreachable in PHP).
+- `strtr-blind-reduction/` — strtr iterate-to-stable reduction (toy grammar).
+- `native-tree-builders/` — json_decode/unserialize/DOMDocument (circular).
+- `parle-extension/` — the `parle` PECL LALR(1) extension.
+- `other-php-parser-libs/` — PHP-PEG / Hoa\Compiler / Phlexy.
+- `sqlite-as-parser/` — use SQLite's own parser as a classifier.
+- `ast-cache/` — cache the AST on a parameterized token-stream signature.
+- `native-rust-extension/` — the optional Rust extension (PR #381/#423/#378).
diff --git a/experiments/_harness/bench-parse-only.php b/experiments/_harness/bench-parse-only.php
@@ -0,0 +1,151 @@
+<?php
+/**
+ * Parse-only benchmark methodology:
+ *   - Lex every query once, up front (lexer NOT part of timing).
+ *   - Time parse() only, best-of-N after warmup iterations.
+ *
+ * Points at an arbitrary src tree so trunk / performance / experiment
+ * branches can be measured with the identical harness:
+ *
+ *   php bench-parse-only.php --src=/abs/.../packages/mysql-on-sqlite/src \
+ *       [--warmup=2] [--runs=5] [--limit=N] [--reuse] [--json]
+ *
+ * --reuse  reuse one parser via reset_tokens() (driver behaviour) instead of
+ *          constructing a fresh parser per query.
+ */
+
+set_error_handler(
+	function ( $severity, $message, $file, $line ) {
+		throw new ErrorException( $message, 0, $severity, $file, $line );
+	}
+);
+
+$src    = null;
+$warmup = 2;
+$runs   = 5;
+$limit  = PHP_INT_MAX;
+$reuse  = in_array( '--reuse', $argv, true );
+$json   = in_array( '--json', $argv, true );
+foreach ( $argv as $arg ) {
+	if ( preg_match( '/^--src=(.+)$/', $arg, $m ) ) {
+		$src = rtrim( $m[1], '/' );
+	}
+	if ( preg_match( '/^--warmup=(\d+)$/', $arg, $m ) ) {
+		$warmup = (int) $m[1];
+	}
+	if ( preg_match( '/^--runs=(\d+)$/', $arg, $m ) ) {
+		$runs = (int) $m[1];
+	}
+	if ( preg_match( '/^--limit=(\d+)$/', $arg, $m ) ) {
+		$limit = (int) $m[1];
+	}
+}
+if ( null === $src ) {
+	fwrite( STDERR, "Missing --src=PATH\n" );
+	exit( 1 );
+}
+
+require_once "$src/parser/class-wp-parser-grammar.php";
+require_once "$src/parser/class-wp-parser-node.php";
+require_once "$src/parser/class-wp-parser-token.php";
+require_once "$src/parser/class-wp-parser.php";
+require_once "$src/mysql/class-wp-mysql-token.php";
+require_once "$src/mysql/class-wp-mysql-lexer.php";
+require_once "$src/mysql/class-wp-mysql-parser.php";
+
+$grammar_data = include "$src/mysql/mysql-grammar.php";
+$grammar      = new WP_Parser_Grammar( $grammar_data );
+
+// Corpus loading identical to run-parser-benchmark.php (no header skip; drop
+// null AND empty records).
+$data_dir = __DIR__ . '/corpus';
+$handle   = fopen( "$data_dir/mysql-server-tests-queries.csv", 'r' );
+$queries  = array();
+while ( ( $record = fgetcsv( $handle, null, ',', '"', '\\' ) ) !== false ) {
+	$query = $record[0] ?? null;
+	if ( null === $query || '' === $query ) {
+		continue;
+	}
+	$queries[] = $query;
+	if ( count( $queries ) >= $limit ) {
+		break;
+	}
+}
+fclose( $handle );
+
+// Pre-lex all queries (excluded from timing).
+$all_tokens = array();
+foreach ( $queries as $query ) {
+	$lexer        = new WP_MySQL_Lexer( $query );
+	$all_tokens[] = $lexer instanceof WP_MySQL_Native_Lexer
+		? $lexer->native_token_stream()
+		: $lexer->remaining_tokens();
+}
+$n = count( $queries );
+
+$run_once = function () use ( $grammar, $all_tokens, $reuse ) {
+	$failures = 0;
+	$parser   = null;
+	$start    = microtime( true );
+	foreach ( $all_tokens as $tokens ) {
+		if ( $reuse ) {
+			if ( null === $parser ) {
+				$parser = new WP_MySQL_Parser( $grammar, $tokens );
+			} else {
+				$parser->reset_tokens( $tokens );
+			}
+		} else {
+			$parser = new WP_MySQL_Parser( $grammar, $tokens );
+		}
+		$ast = $parser->parse();
+		if ( null === $ast ) {
+			++$failures;
+		}
+	}
+	return array( microtime( true ) - $start, $failures );
+};
+
+for ( $i = 0; $i < $warmup; $i++ ) {
+	$run_once();
+}
+
+$qpss = array();
+$fail = 0;
+for ( $r = 0; $r < $runs; $r++ ) {
+	list( $duration, $failures ) = $run_once();
+	$qpss[] = $n / $duration;
+	$fail   = $failures;
+}
+sort( $qpss );
+$best   = $qpss[ count( $qpss ) - 1 ];
+$median = $qpss[ intdiv( count( $qpss ), 2 ) ];
+
+$jit_on = false;
+$status = opcache_get_status( false );
+if ( is_array( $status ) && isset( $status['jit']['on'] ) ) {
+	$jit_on = (bool) $status['jit']['on'];
+}
+
+if ( $json ) {
+	echo json_encode(
+		array(
+			'queries'  => $n,
+			'failures' => $fail,
+			'qps_best' => $best,
+			'qps_med'  => $median,
+			'jit'      => $jit_on,
+			'php'      => PHP_VERSION,
+		)
+	), "\n";
+	exit;
+}
+
+printf(
+	"queries=%d failures=%d  best=%d QPS  median=%d QPS  jit=%s php=%s\n",
+	$n,
+	$fail,
+	$best,
+	$median,
+	$jit_on ? 'on' : 'off',
+	PHP_VERSION
+);
diff --git a/experiments/_harness/bench-parser-split.php b/experiments/_harness/bench-parser-split.php
@@ -0,0 +1,95 @@
+<?php
+/**
+ * Parser performance benchmark with split timings.
+ *
+ * Separates lex time from parse time by pre-tokenizing all queries before
+ * starting the parse-only timer. Reports total, average, and per-phase QPS.
+ *
+ * Usage:
+ *   php bench-parser-split.php [--runs=N] [--limit=M]
+ */
+
+set_error_handler(
+	function ( $severity, $message, $file, $line ) {
+		throw new ErrorException( $message, 0, $severity, $file, $line );
+	}
+);
+
+require_once __DIR__ . '/../../src/parser/class-wp-parser-grammar.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-node.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-token.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-token.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-lexer.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-parser.php';
+
+$runs  = 1;
+$limit = PHP_INT_MAX;
+foreach ( $argv as $arg ) {
+	if ( preg_match( '/^--runs=(\d+)$/', $arg, $m ) ) {
+		$runs = (int) $m[1];
+	}
+	if ( preg_match( '/^--limit=(\d+)$/', $arg, $m ) ) {
+		$limit = (int) $m[1];
+	}
+}
+
+$grammar_data = include __DIR__ . '/../../src/mysql/mysql-grammar.php';
+$grammar      = new WP_Parser_Grammar( $grammar_data );
+
+$data_dir = __DIR__ . '/../mysql/data';
+$handle   = fopen( "$data_dir/mysql-server-tests-queries.csv", 'r' );
+$queries  = array();
+$header   = true;
+while ( ( $record = fgetcsv( $handle, null, ',', '"', '\\' ) ) !== false ) {
+	if ( $header ) {
+		$header = false;
+		continue;
+	}
+	if ( null !== $record[0] ) {
+		$queries[] = $record[0];
+	}
+	if ( count( $queries ) >= $limit ) {
+		break;
+	}
+}
+fclose( $handle );
+echo 'Loaded ', count( $queries ), " queries\n";
+
+// Pre-tokenize all queries once. The tokens are reused across runs, so the
+// parser starts from a cold AST cache each iteration but a warm token cache.
+$lex_start  = microtime( true );
+$all_tokens = array();
+foreach ( $queries as $query ) {
+	$lexer        = new WP_MySQL_Lexer( $query );
+	$all_tokens[] = $lexer->remaining_tokens();
+}
+$lex_duration = microtime( true ) - $lex_start;
+printf( "Lex: %.4fs, %d QPS\n", $lex_duration, count( $queries ) / $lex_duration );
+
+// Parse benchmark.
+$results = array();
+for ( $r = 0; $r < $runs; $r++ ) {
+	$failures = 0;
+	$start    = microtime( true );
+	foreach ( $all_tokens as $tokens ) {
+		$parser = new WP_MySQL_Parser( $grammar, $tokens );
+		$ast    = $parser->parse();
+		if ( null === $ast ) {
+			++$failures;
+		}
+	}
+	$duration  = microtime( true ) - $start;
+	$qps       = count( $queries ) / $duration;
+	$results[] = array( $duration, $qps, $failures );
+	printf( "Run %d: %.4fs, %d QPS, %d failures\n", $r + 1, $duration, $qps, $failures );
+}
+
+if ( $runs > 1 ) {
+	$durations = array_column( $results, 0 );
+	sort( $durations );
+	$best = $durations[0];
+	printf( "Best: %.4fs, %d QPS\n", $best, count( $queries ) / $best );
+	$avg = array_sum( $durations ) / count( $durations );
+	printf( "Avg:  %.4fs, %d QPS\n", $avg, count( $queries ) / $avg );
+}