diff --git a/bench-cli.mjs b/bench-cli.mjs new file mode 100644 index 00000000000..5cead062bcb --- /dev/null +++ b/bench-cli.mjs @@ -0,0 +1,276 @@ +/** + * CLI/build-style benchmark: simulates a real build pass over a project. + * + * IDE benchmark: same template, many iterations (measures JIT-warmed throughput) + * CLI benchmark: many distinct templates, one pass (cold-ish JIT, one-time init cost) + * + * Run: node bench-cli.mjs + */ + +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +const currentDistPath = join(__dirname, 'packages/@glimmer/syntax/dist/es/index.js'); +const prDistPath = '/tmp/pr-21313/packages/@glimmer/syntax/dist/es/index.js'; + +// ─── Realistic template corpus ──────────────────────────────────────────────── +// ~50 distinct templates of varying complexity, simulating a real Ember project. + +const TEMPLATES = [ + `
{{this.title}}
`, + `{{@label}}`, + ``, + `{{#if this.isLoading}}{{else}}{{yield}}{{/if}}`, + ``, + ``, + `
{{yield}}
`, + `

{{this.title}}

{{this.description}}

`, + `{{#let (hash name=@name age=@age) as |person|}}{{person.name}}{{/let}}`, + `
`, + + ``, + + ``, + + ` + + + {{#each @columns as |col|}} + + {{/each}} + + + + {{#each this.sortedRows as |row|}} + + {{#each @columns as |col|}} + + {{/each}} + + {{/each}} + +
+ {{col.label}} + {{#if (eq this.sortKey col.key)}} + + {{/if}} +
{{get row col.key}}
`, + + ``, + + ``, + + `{{#each @notifications as |notif|}} + +{{/each}}`, + + ``, + + `
+
+ {{#each this.toolbarButtons as |btn|}} + + {{/each}} +
+
+
+
`, + + `
+
+ +

{{this.monthLabel}} {{this.year}}

+ +
+
+ {{#each this.weeks as |week|}} +
+ {{#each week as |day|}} + + {{/each}} +
+ {{/each}} +
+
`, + + // Extra medium-sized templates to fill out the corpus + ...Array.from({ length: 30 }, (_, i) => ` +
+
+

{{@title}}

+ {{#if @subtitle}}

{{@subtitle}}

{{/if}} +
+
+ {{#each @items as |item|}} +
+

{{item.name}}

+ {{#if item.description}} +

{{item.description}}

+ {{/if}} +
+ {{item.category}} + +
+
+ {{/each}} +
+
`), +]; + +console.log(`Corpus: ${TEMPLATES.length} distinct templates, total ${TEMPLATES.reduce((s, t) => s + t.length, 0)} chars\n`); + +// ─── Measurements ───────────────────────────────────────────────────────────── + +async function measureParser(label, distPath) { + // Measure cold first-parse (includes module load + any lazy init like WASM) + const t0 = performance.now(); + const { preprocess } = await import(distPath); + const loadMs = performance.now() - t0; + + // First parse (triggers WASM init if applicable, cold V8) + const t1 = performance.now(); + preprocess(TEMPLATES[0]); + const firstParseMs = performance.now() - t1; + + // Single-pass build simulation: parse each template once (no repetition) + // Run this 10 times to get stable numbers (simulates running the build tool 10x) + const buildTimes = []; + for (let run = 0; run < 10; run++) { + const start = performance.now(); + for (const tpl of TEMPLATES) preprocess(tpl); + buildTimes.push(performance.now() - start); + } + const buildMin = Math.min(...buildTimes); + const buildMed = buildTimes.slice().sort((a, b) => a - b)[5]; // p50 + + // Extrapolate to a 500-template project + const perTemplate = buildMin / TEMPLATES.length; + const proj500 = perTemplate * 500; + + return { label, loadMs, firstParseMs, buildMin, buildMed, perTemplate, proj500 }; +} + +console.log('Loading and measuring (this takes ~10s)...\n'); + +const current = await measureParser('current branch', currentDistPath); +const pr = await measureParser('PR #21313 (rust)', prDistPath); + +// ─── Output ─────────────────────────────────────────────────────────────────── + +function row(label, cur, prv, unit = 'ms', lowerIsBetter = true) { + const winner = lowerIsBetter ? (cur < prv ? 'current' : 'rust-pr') : (cur > prv ? 'current' : 'rust-pr'); + const ratio = winner === 'current' ? (prv / cur).toFixed(2) : (cur / prv).toFixed(2); + const arrow = winner === 'current' ? '<' : '>'; + console.log( + ` ${label.padEnd(32)} ${String(cur.toFixed(2) + unit).padStart(10)} ${arrow} ${String(prv.toFixed(2) + unit).padStart(10)} ${ratio}x (${winner} wins)` + ); +} + +console.log(`${'Metric'.padEnd(32)} ${'current'.padStart(10)} ${'PR#21313'.padStart(10)} winner`); +console.log('-'.repeat(80)); + +row('Module load (import)', current.loadMs, pr.loadMs); +row('First parse (cold)', current.firstParseMs, pr.firstParseMs); +row(`Build pass (${TEMPLATES.length} tpl, best of 10)`, current.buildMin, pr.buildMin); +row(`Build pass (p50)`, current.buildMed, pr.buildMed); +row('Per-template avg (build)', current.perTemplate, pr.perTemplate, 'ms'); +row('500-template project (proj)', current.proj500, pr.proj500, 'ms'); + +console.log(''); +console.log('Notes:'); +console.log(` current branch : JS pipeline (handlebars v2 parser)`); +console.log(` PR #21313 : Rust/WASM (pest.rs) + JSON bridge + JS post-processing`); +console.log(` "build pass" : single-pass over ${TEMPLATES.length} distinct templates (no repeat, simulates CLI)`); +console.log(` "first parse" : includes any lazy WASM init (one-time per process)`); diff --git a/bench-compare.mjs b/bench-compare.mjs new file mode 100644 index 00000000000..9eacb9bce6d --- /dev/null +++ b/bench-compare.mjs @@ -0,0 +1,202 @@ +/** + * Parser benchmark: this branch (perf/handlebars-v2-parser) vs PR #21313 (rust-parser-pest) + * + * Run: node bench-compare.mjs + */ + +import { createRequire } from 'module'; +import { fileURLToPath } from 'url'; +import { dirname, join } from 'path'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +const currentDistPath = join(__dirname, 'packages/@glimmer/syntax/dist/es/index.js'); +const prDistPath = '/tmp/pr-21313/packages/@glimmer/syntax/dist/es/index.js'; + +// ─── Templates ──────────────────────────────────────────────────────────────── + +const small = `
{{this.title}}
`; + +const medium = ` +
+

{{this.title}}

+ {{#each this.items as |item index|}} +
+ {{item.name}} + +
+ {{/each}} + {{#if this.showFooter}} +
{{this.footerText}}
+ {{/if}} +
`; + +const large = medium.repeat(10); + +const realWorld = ` +
+
+ {{this.username}} +
+

{{this.displayName}}

+

{{this.bio}}

+ {{this.role}} +
+ {{#if this.isOwnProfile}} + + {{/if}} +
+ + + +
+ {{#if (eq this.activeTab "posts")}} + {{#each this.posts as |post|}} +
+

{{post.title}}

+

{{post.excerpt}}

+
+ + {{post.views}} views +
+
+ {{else}} +

No posts yet.

+ {{/each}} + {{else if (eq this.activeTab "followers")}} + {{#each this.followers as |follower|}} +
+ {{follower.name}} + {{follower.name}} + +
+ {{/each}} + {{/if}} +
+
`; + +const templates = [ + ['small', small], + ['medium', medium], + ['large', large], + ['real-world', realWorld], +]; + +// ─── Benchmark runner ───────────────────────────────────────────────────────── + +function bench(preprocess, name, template, iterations = 1000) { + // warm up + for (let i = 0; i < 50; i++) preprocess(template); + + const start = performance.now(); + for (let i = 0; i < iterations; i++) preprocess(template); + const elapsed = performance.now() - start; + + return { + name, + chars: template.length, + ms: elapsed / iterations, + }; +} + +function printTable(rows) { + const colWidths = [18, 8, 12, 12, 12]; + const headers = ['template', 'chars', 'current (ms)', 'pr#21313 (ms)', 'speedup']; + const sep = colWidths.map((w) => '-'.repeat(w)).join('-+-'); + + const pad = (s, w) => String(s).padEnd(w); + const rpad = (s, w) => String(s).padStart(w); + + console.log('\n' + headers.map((h, i) => pad(h, colWidths[i])).join(' | ')); + console.log(sep); + for (const row of rows) { + const faster = row.current < row.pr ? 'current' : 'rust-pr'; + const ratio = row.current < row.pr + ? (row.pr / row.current).toFixed(2) + 'x (current wins)' + : (row.current / row.pr).toFixed(2) + 'x (rust wins)'; + console.log( + [ + pad(row.template, colWidths[0]), + rpad(row.chars, colWidths[1]), + rpad(row.current.toFixed(3), colWidths[2]), + rpad(row.pr.toFixed(3), colWidths[3]), + rpad(ratio, colWidths[4] + 20), + ].join(' | ') + ); + } +} + +// ─── Main ───────────────────────────────────────────────────────────────────── + +console.log('Loading parsers...'); + +const { preprocess: preprocessCurrent } = await import(currentDistPath); +const { preprocess: preprocessPR } = await import(prDistPath); + +console.log('Parsers loaded. Running benchmarks...\n'); + +// Verify both produce output (smoke check) +try { + const r1 = preprocessCurrent('
{{foo}}
'); + const r2 = preprocessPR('
{{foo}}
'); + console.log(`Current branch: ${r1.type} (${r1.body.length} top-level nodes)`); + console.log(`PR #21313: ${r2.type} (${r2.body.length} top-level nodes)`); +} catch (e) { + console.error('Smoke check failed:', e.message); + process.exit(1); +} + +console.log(''); + +const N = 1000; +const rows = []; + +for (const [name, tpl] of templates) { + process.stdout.write(` Benchmarking '${name}'...`); + const currentResult = bench(preprocessCurrent, name, tpl, N); + const prResult = bench(preprocessPR, name, tpl, N); + process.stdout.write(' done\n'); + rows.push({ + template: name, + chars: tpl.length, + current: currentResult.ms, + pr: prResult.ms, + }); +} + +printTable(rows); + +// Phase breakdown: measure the PR's WASM parse vs JS post-processing +console.log('\n--- Phase breakdown (PR #21313, medium template) ---'); +const { parseTemplateToJson } = await import('/tmp/pr-21313/packages/@glimmer/syntax/pkg/universal.mjs'); +const src = medium; +const N2 = 1000; + +// warm up WASM +for (let i = 0; i < 50; i++) parseTemplateToJson(src); + +const startWasm = performance.now(); +for (let i = 0; i < N2; i++) parseTemplateToJson(src); +const wasmMs = (performance.now() - startWasm) / N2; + +const startFull = performance.now(); +for (let i = 0; i < N2; i++) preprocessPR(src); +const fullMs = (performance.now() - startFull) / N2; + +console.log(` WASM parse only: ${wasmMs.toFixed(3)}ms`); +console.log(` Full preprocess() (PR): ${fullMs.toFixed(3)}ms`); +console.log(` JS post-processing cost: ${(fullMs - wasmMs).toFixed(3)}ms`); diff --git a/bench-full-pipeline.mjs b/bench-full-pipeline.mjs new file mode 100644 index 00000000000..d59d3f59058 --- /dev/null +++ b/bench-full-pipeline.mjs @@ -0,0 +1,194 @@ +/** + * Full compile pipeline benchmark: preprocess() → normalize() → compile() → wire format + * + * Uses ember-template-compiler's precompile() which exercises the entire stack. + * Three-way comparison: main (Jison), v2-parser (this branch), rust/wasm (PR #21313). + * + * Run: node bench-full-pipeline.mjs + */ + +const MAIN = '/tmp/ember-main/dist/packages/ember-template-compiler/index.js'; +const V2 = '/Users/real-world-project/ember.js/dist/packages/ember-template-compiler/index.js'; +const RUST = '/tmp/pr-21313/dist/dev/packages/ember-template-compiler/index.js'; + +// Also import the syntax-only preprocess for the parse-only split +const MAIN_SYNTAX = '/tmp/ember-main/packages/@glimmer/syntax/dist/es/index.js'; +const V2_SYNTAX = '/Users/real-world-project/ember.js/packages/@glimmer/syntax/dist/es/index.js'; +const RUST_SYNTAX = '/tmp/pr-21313/packages/@glimmer/syntax/dist/es/index.js'; + +// ── Templates ────────────────────────────────────────────────────────────────── + +const small = `
{{this.title}}
`; + +const medium = ` +
+

{{this.title}}

+ {{#each this.items as |item index|}} +
+ {{item.name}} + +
+ {{/each}} + {{#if this.showFooter}} +
{{this.footerText}}
+ {{/if}} +
`; + +const realWorld = ` +
+
+ {{this.username}} +

{{this.displayName}}

+

{{this.bio}}

+ {{#if this.isOwnProfile}} + + {{/if}} +
+ +
+ {{#if (eq this.activeTab "posts")}} + {{#each this.posts as |post|}} +
+

{{post.title}}

{{post.excerpt}}

+
{{post.views}} views
+
+ {{else}} +

No posts yet.

+ {{/each}} + {{else if (eq this.activeTab "followers")}} + {{#each this.followers as |follower|}} +
+ {{follower.name}} + {{follower.name}} + +
+ {{/each}} + {{/if}} +
+
`; + +const large = medium.repeat(10); + +const templates = [ + ['small', small, 2000], + ['medium', medium, 1000], + ['real-world', realWorld, 1000], + ['large (10x)', large, 300], +]; + +// ── Helpers ──────────────────────────────────────────────────────────────────── + +function bench(fn, tpl, N) { + for (let i = 0; i < Math.min(50, N); i++) fn(tpl); + const t = performance.now(); + for (let i = 0; i < N; i++) fn(tpl); + return (performance.now() - t) / N; +} + +function pct(part, total) { + return ((part / total) * 100).toFixed(0) + '%'; +} + +// ── Load all parsers ─────────────────────────────────────────────────────────── + +console.log('Loading compilers...'); +const [ + { precompile: compileMain }, + { precompile: compileV2 }, + { precompile: compileRust }, + { preprocess: parseMain }, + { preprocess: parseV2 }, + { preprocess: parseRust }, +] = await Promise.all([ + import(MAIN), + import(V2), + import(RUST), + import(MAIN_SYNTAX), + import(V2_SYNTAX), + import(RUST_SYNTAX), +]); +console.log('Loaded.\n'); + +// ── Section 1: full precompile() ─────────────────────────────────────────────── + +console.log('━'.repeat(90)); +console.log('FULL PIPELINE: precompile() → wire format (ms/call, warmed JIT)'); +console.log('━'.repeat(90)); +console.log('template chars main(Jison) v2-parser rust/wasm v2vsJison v2vsRust'); +console.log('─'.repeat(90)); + +const fullResults = {}; +for (const [name, tpl, N] of templates) { + const m = bench(compileMain, tpl, N); + const v = bench(compileV2, tpl, N); + const r = bench(compileRust, tpl, N); + fullResults[name] = { m, v, r, chars: tpl.length }; + console.log( + name.padEnd(16) + + String(tpl.length).padStart(6) + ' ' + + m.toFixed(3).padStart(11) + ' ' + + v.toFixed(3).padStart(11) + ' ' + + r.toFixed(3).padStart(10) + ' ' + + (m / v).toFixed(2).padStart(7) + 'x ' + + (r / v).toFixed(2).padStart(7) + 'x' + ); +} + +// ── Section 2: parse-only vs full compile split ──────────────────────────────── + +console.log('\n' + '━'.repeat(90)); +console.log('PARSE vs COMPILE SPLIT (medium template, showing where time goes)'); +console.log('━'.repeat(90)); + +const N_SPLIT = 2000; +const parseOnlyMain = bench(parseMain, medium, N_SPLIT); +const parseOnlyV2 = bench(parseV2, medium, N_SPLIT); +const parseOnlyRust = bench(parseRust, medium, N_SPLIT); +const fullMain = bench(compileMain, medium, N_SPLIT); +const fullV2 = bench(compileV2, medium, N_SPLIT); +const fullRust = bench(compileRust, medium, N_SPLIT); + +const compileOnlyMain = fullMain - parseOnlyMain; +const compileOnlyV2 = fullV2 - parseOnlyV2; +const compileOnlyRust = fullRust - parseOnlyRust; + +console.log('\n main(Jison) v2-parser rust/wasm'); +console.log('─'.repeat(70)); +console.log( + 'parse() ' + + `${parseOnlyMain.toFixed(3)}ms (${pct(parseOnlyMain, fullMain)})`.padEnd(20) + + `${parseOnlyV2.toFixed(3)}ms (${pct(parseOnlyV2, fullV2)})`.padEnd(20) + + `${parseOnlyRust.toFixed(3)}ms (${pct(parseOnlyRust, fullRust)})` +); +console.log( + 'compile only ' + + `${compileOnlyMain.toFixed(3)}ms (${pct(compileOnlyMain, fullMain)})`.padEnd(20) + + `${compileOnlyV2.toFixed(3)}ms (${pct(compileOnlyV2, fullV2)})`.padEnd(20) + + `${compileOnlyRust.toFixed(3)}ms (${pct(compileOnlyRust, fullRust)})` +); +console.log( + 'total ' + + `${fullMain.toFixed(3)}ms`.padEnd(20) + + `${fullV2.toFixed(3)}ms`.padEnd(20) + + `${fullRust.toFixed(3)}ms` +); + +// ── Section 3: 500-template project projection ───────────────────────────────── + +console.log('\n' + '━'.repeat(90)); +console.log('500-TEMPLATE PROJECT (build-time projection, using real-world template timing)'); +console.log('━'.repeat(90)); + +const { m: rwm, v: rwv, r: rwr } = fullResults['real-world']; +const scale = 500; +console.log(`\n main(Jison): ${(rwm * scale).toFixed(0)}ms total (${rwm.toFixed(3)}ms × ${scale})`); +console.log(` v2-parser: ${(rwv * scale).toFixed(0)}ms total (${rwv.toFixed(3)}ms × ${scale}) — ${(rwm / rwv).toFixed(2)}x faster than Jison`); +console.log(` rust/wasm: ${(rwr * scale).toFixed(0)}ms total (${rwr.toFixed(3)}ms × ${scale}) — ${(rwr / rwv).toFixed(2)}x slower than v2`); diff --git a/packages/@handlebars/parser/PERF-INVESTIGATION.md b/packages/@handlebars/parser/PERF-INVESTIGATION.md new file mode 100644 index 00000000000..a5ec8320077 --- /dev/null +++ b/packages/@handlebars/parser/PERF-INVESTIGATION.md @@ -0,0 +1,104 @@ +# @handlebars/parser v2: Performance Investigation & Hand-Written Replacement + +## Context + +`@glimmer/syntax`'s `preprocess()` is the #1 bottleneck in Glint's per-keystroke pipeline, taking ~2.3ms (56%) of the ~4ms total `rewriteModule` cost. This investigation explored whether the internalized `@handlebars/parser` (PR #21069) could be made faster. + +## Findings + +### Baseline: Where time is spent in `preprocess()` + +For a realistic 1400-char component template (~0.79ms total): + +| Phase | Time | % of total | +|-------|------|-----------| +| Jison LALR(1) parser (`@handlebars/parser`) | 0.40ms | 50% | +| Glimmer conversion (`simple-html-tokenizer` + AST build) | 0.39ms | 50% | + +The Jison parser is slow because: +1. **Regex gauntlet**: Tests up to 40 regexes per token in the `mu` (mustache) state +2. **String slicing**: `this._input.slice(match[0].length)` on every token creates new strings +3. **Per-token regex for newlines**: `/(?:\r\n?|\n).*/g` to track line numbers +4. **Object allocation**: New `yylloc` object per token match + +### What is NOT a bottleneck + +| Suspected hotspot | Actual cost | Verdict | +|---|---|---| +| `charPosFor()` line scanning | 0.19µs/call | Lazy, cached — negligible | +| `SourceSpan.forHbsLoc()` | 0.1µs/span | Fast enough | +| `match()` dispatch in span.ts | µs-level | Compiled at init time | +| Parser constructor `string.split()` | 1.9µs | Negligible | +| WhitespaceControl pass | <0.02ms | Nearly free | + +### Optimization: Caching in Glint (consumer-side) + +A `Map` cache would give **903x speedup** for unchanged templates. Most keystrokes don't change the template portion of a `.gts` file. This is the single highest-impact optimization. + +## v2 Parser: Hand-Written Recursive Descent Replacement + +A hand-written parser (`v2-parser.js`, ~800 lines) replaces the 2032-line Jison-generated parser. It produces AST-identical output. + +### Key optimizations + +1. **Index-based scanning** — maintains a `pos` cursor, never slices the input string +2. **`indexOf('{{')` for content scanning** — vs Jison's regex `/^(?:[^\x00]*?(?=(\{\{)))/` +3. **`charCodeAt` dispatch** — classifies `{{#`, `{{/`, `{{^`, `{{!`, etc. with a switch on char codes instead of testing 40 regexes +4. **Batched line/column tracking** — scans for `\n` with `indexOf` between positions rather than per-character + +### Performance results + +#### HBS parser alone (6-10x faster) + +| Template | Jison | v2 | Speedup | +|----------|------:|---:|--------:| +| small (25 chars) | 0.010ms | 0.002ms | **6.1x** | +| medium (352 chars) | 0.089ms | 0.012ms | **7.7x** | +| large (3520 chars) | 0.844ms | 0.080ms | **10.6x** | + +#### End-to-end `preprocess()` (2-3x faster) + +| Template | Before | After | Speedup | +|----------|-------:|------:|--------:| +| small (25 chars) | 0.025ms | 0.011ms | **2.3x** | +| medium (352 chars) | 0.190ms | 0.090ms | **2.1x** | +| realistic (1435 chars) | 0.791ms | 0.280ms | **2.8x** | +| large (3520 chars) | 1.716ms | 0.901ms | **1.9x** | + +The remaining ~50% is Glimmer's `simple-html-tokenizer` + AST conversion, unchanged. + +### Test status + +- **104/104** `@handlebars/parser` unit tests pass (parser, AST, visitor) +- **8780/8788** Ember test suite tests pass +- 8 remaining edge-case failures: + - 7 reserved-arg tests (`@`, `@0`, `@@`, etc.) — same parse error, different Error type than expected + - 1 subtle location mismatch on a deeply nested inverse block + +### Architecture + +The v2 parser is a single file with the lexer and parser fused: + +``` +v2-parser.js +├── Character code constants +├── isIdChar() / isWhitespace() / isLookahead() — char classification +├── v2ParseWithoutProcessing(input, options) — entry point +│ ├── Position tracking (pos, line, col, advanceTo, savePos) +│ ├── Scanning primitives (skipWs, scanId, scanString, scanNumber, scanEscapedLiteral) +│ ├── Content scanning (scanContent — uses indexOf('{{')) +│ ├── Mustache classification (consumeOpen — charCodeAt dispatch) +│ ├── Expression parsing (parseExpr, parseHelperName, parsePath, parseSexpr) +│ ├── Hash parsing (parseHash, parseHashPair, isAtHash lookahead) +│ ├── Block parsing (parseBlock, parseInverseBlock, parseInverseChain) +│ ├── Other statements (parsePartial, parsePartialBlock, parseRawBlock, parseComment) +│ └── Program parsing (parseProgram — top-level loop with terminator detection) +└── Helper functions (stripComment, arrayLiteralNode, hashLiteralNode) +``` + +## Future opportunities + +1. **Glint-side caching** — 903x for cache hits, zero risk to parser +2. **Replace `simple-html-tokenizer`** — the other 50% of `preprocess()` time +3. **Rust/Wasm parser** — could combine with `content-tag` for end-to-end `.gts` parsing +4. **Incremental reparsing** — only reparse changed template regions diff --git a/packages/@handlebars/parser/lib/v2-parser.js b/packages/@handlebars/parser/lib/v2-parser.js new file mode 100644 index 00000000000..84529fca678 --- /dev/null +++ b/packages/@handlebars/parser/lib/v2-parser.js @@ -0,0 +1,1919 @@ +// @ts-nocheck +/** + * Claude-iterated POC for a recursive descent parser for Handlebars templates. + * Drop-in replacement for the Jison-generated parser. + * + * Key optimizations over Jison: + * 1. Index-based scanning (never slices the input string to advance) + * 2. indexOf('{{') for content scanning instead of regex + * 3. charCodeAt dispatch instead of testing 40 regexes per token + * 4. Line/col tracking via indexOf('\n') batching + * 5. No intermediate token objects — parser reads directly from input + */ + +import Exception from './exception.js'; + +// Character codes +const CH_NL = 10; // \n +const CH_CR = 13; // \r +const CH_SPACE = 32; +const CH_TAB = 9; +const CH_BANG = 33; // ! +const CH_DQUOTE = 34; // " +const CH_HASH = 35; // # +const CH_DOLLAR = 36; // $ +const CH_AMP = 38; // & +const CH_SQUOTE = 39; // ' +const CH_LPAREN = 40; // ( +const CH_RPAREN = 41; // ) +const CH_STAR = 42; // * +const CH_DASH = 45; // - +const CH_DOT = 46; // . +const CH_SLASH = 47; // / +const CH_0 = 48; +const CH_9 = 57; +const CH_SEMI = 59; // ; +const CH_EQ = 61; // = +const CH_GT = 62; // > +const CH_AT = 64; // @ +const CH_LBRACKET = 91; // [ +const CH_BACKSLASH = 92; // \\ +const CH_RBRACKET = 93; // ] +const CH_CARET = 94; // ^ +const CH_BACKTICK = 96; // ` +const CH_LBRACE = 123; // { +const CH_PIPE = 124; // | +const CH_RBRACE = 125; // } +const CH_TILDE = 126; // ~ + +/** + * Check if a character code can appear in a Handlebars ID. + * Based on the ID regex: [^\s!"#%-,\.\/;->@\[-\^`\{-~]+ + */ +function isIdChar(c) { + if (c <= CH_SPACE) return false; // whitespace + control + if (c === CH_BANG || c === CH_DQUOTE || c === CH_HASH) return false; + if (c >= 37 && c <= 44) return false; // % & ' ( ) * + , + if (c === CH_DOT || c === CH_SLASH) return false; + if (c >= CH_SEMI && c <= CH_GT) return false; // ; < = > + if (c === CH_AT) return false; + if (c >= CH_LBRACKET && c <= CH_CARET) return false; // [ \ ] ^ + if (c === CH_BACKTICK) return false; + if (c >= CH_LBRACE && c <= CH_TILDE) return false; // { | } ~ + return true; +} + +function isWhitespace(c) { + return c === CH_SPACE || c === CH_TAB || c === CH_NL || c === CH_CR || c === 12; // form feed +} + +/** + * Check if a character is a lookahead character for ID/literal matching. + * LOOKAHEAD = [=~}\s\/.)\]|] + */ +function isLookahead(c) { + return ( + c === CH_EQ || + c === CH_TILDE || + c === CH_RBRACE || + isWhitespace(c) || + c === CH_SLASH || + c === CH_DOT || + c === CH_RPAREN || + c === CH_RBRACKET || + c === CH_PIPE || + c !== c // NaN (past end of string) + ); +} + +/** + * LITERAL_LOOKAHEAD = [~}\s)\]] + */ +function isLiteralLookahead(c) { + return ( + c === CH_TILDE || + c === CH_RBRACE || + isWhitespace(c) || + c === CH_RPAREN || + c === CH_RBRACKET || + c !== c // NaN + ); +} + +/** + * Strip brackets from an ID token: [foo] → foo + */ +function idFromToken(token) { + if (token.charCodeAt(0) === CH_LBRACKET && token.charCodeAt(token.length - 1) === CH_RBRACKET) { + return token.substring(1, token.length - 1); + } + return token; +} + +function stripComment(comment) { + return comment.replace(/^\{\{~?!-?-?/, '').replace(/-?-?~?\}\}$/, ''); +} + +export function v2ParseWithoutProcessing(input, options) { + if (typeof input !== 'string') { + // Pass through already-compiled AST + if (input.type === 'Program') return input; + throw new Error('Expected string or Program AST'); + } + + // === State === + let pos = 0; + let line = 1; + let col = 0; + const len = input.length; + const srcName = options?.srcName ?? undefined; + + // Syntax options + let squareSyntax; + if (typeof options?.syntax?.square === 'function') { + squareSyntax = options.syntax.square; + } else if (options?.syntax?.square === 'node') { + squareSyntax = arrayLiteralNode; + } else { + squareSyntax = 'string'; + } + + let hashSyntax; + if (typeof options?.syntax?.hash === 'function') { + hashSyntax = options.syntax.hash; + } else { + hashSyntax = hashLiteralNode; + } + + // yy-like context for helper callbacks + const yy = { preparePath, id: idFromToken, locInfo: makeLoc }; + + // === Position tracking === + + function advanceTo(target) { + while (pos < target) { + const nl = input.indexOf('\n', pos); + if (nl === -1 || nl >= target) { + col += target - pos; + pos = target; + return; + } + // Count the newline + line++; + col = 0; + pos = nl + 1; + } + } + + function cc(offset) { + return input.charCodeAt(pos + (offset || 0)); + } + + function startsWith(str, offset) { + return input.startsWith(str, pos + (offset || 0)); + } + + function makeLoc(sl, sc, el, ec) { + return { + source: srcName, + start: { line: sl, column: sc }, + end: { line: el || line, column: ec !== undefined ? ec : col }, + }; + } + + function savePos() { + return { line, col }; + } + + function locFrom(start) { + return makeLoc(start.line, start.col, line, col); + } + + function error(msg) { + throw new Exception('Parse error on line ' + line + ':\n' + input.slice(pos, pos + 20) + '\n' + msg, { + loc: makeLoc(line, col), + }); + } + + // === Scanning primitives === + + function skipWs() { + while (pos < len && isWhitespace(cc())) { + if (cc() === CH_NL) { + line++; + col = 0; + pos++; + } else if (cc() === CH_CR) { + line++; + col = 0; + pos++; + if (pos < len && cc() === CH_NL) pos++; // \r\n + } else { + col++; + pos++; + } + } + } + + function scanId() { + const start = pos; + while (pos < len && isIdChar(cc())) { + col++; + pos++; + } + if (pos === start) return null; + return input.substring(start, pos); + } + + function scanEscapedLiteral() { + // We're at '[', scan to matching ']' with backslash escaping + if (cc() !== CH_LBRACKET) return null; + const start = pos; + col++; + pos++; // skip [ + while (pos < len) { + const c = cc(); + if (c === CH_BACKSLASH && pos + 1 < len) { + col += 2; + pos += 2; // skip escaped char + } else if (c === CH_RBRACKET) { + col++; + pos++; // skip ] + const raw = input.substring(start, pos); + return raw.replace(/\\([\\\]])/g, '$1'); + } else if (c === CH_NL) { + line++; + col = 0; + pos++; + } else { + col++; + pos++; + } + } + error('Unterminated escaped literal'); + } + + function scanString() { + const quote = cc(); + if (quote !== CH_DQUOTE && quote !== CH_SQUOTE) return null; + const startPos = pos; + const startP = savePos(); + col++; + pos++; // skip opening quote + let result = ''; + let segStart = pos; + while (pos < len) { + const c = cc(); + if (c === CH_BACKSLASH && pos + 1 < len && cc(1) === quote) { + result += input.substring(segStart, pos); + col += 2; + pos += 2; + result += String.fromCharCode(quote); + segStart = pos; + } else if (c === quote) { + result += input.substring(segStart, pos); + col++; + pos++; // skip closing quote + return { value: result, original: result, loc: locFrom(startP) }; + } else if (c === CH_NL) { + line++; + col = 0; + pos++; + } else { + col++; + pos++; + } + } + error('Unterminated string'); + } + + function scanNumber() { + const start = pos; + if (cc() === CH_DASH) { + col++; + pos++; + } + if (pos >= len || cc() < CH_0 || cc() > CH_9) { + // Not a number, restore + advanceTo(start); // no-op if no dash + pos = start; + col = col - (pos - start); // crude restore + return null; + } + // Actually, let me just save/restore properly + const savedLine = line; + const savedCol = col; + + // Reset to start for proper scanning + pos = start; + line = savedLine; + col = savedCol - (pos === start ? 0 : 1); + + if (cc() === CH_DASH) { + col++; + pos++; + } + while (pos < len && cc() >= CH_0 && cc() <= CH_9) { + col++; + pos++; + } + if (pos < len && cc() === CH_DOT) { + col++; + pos++; + while (pos < len && cc() >= CH_0 && cc() <= CH_9) { + col++; + pos++; + } + } + // Check literal lookahead + if (pos < len && !isLiteralLookahead(cc())) { + // Not a valid number, restore + pos = start; + line = savedLine; + col = savedCol - (pos - start); + return null; + } + return input.substring(start, pos); + } + + // === Content scanning === + + function scanContent() { + if (pos >= len) return null; + const startP = savePos(); + const start = pos; + let result = ''; + let segStart = pos; + + while (pos < len) { + const idx = input.indexOf('{{', pos); + if (idx === -1) { + // Rest is content + advanceTo(len); + result += input.substring(segStart, len); + if (result.length === 0) return null; + return { + type: 'ContentStatement', + original: result, + value: result, + loc: locFrom(startP), + }; + } + + // Check for escaped mustache — only if the backslash is within our scan range + if (idx > pos && input.charCodeAt(idx - 1) === CH_BACKSLASH) { + if (idx > pos + 1 && input.charCodeAt(idx - 2) === CH_BACKSLASH) { + // \\{{ — the \\ is a literal backslash, {{ is a real mustache + // Content includes everything up to \\{{ with one backslash stripped + result += input.substring(segStart, idx - 1); // strip one backslash + advanceTo(idx); // advance to the real {{ (not past it) + if (result.length === 0) return null; + return { + type: 'ContentStatement', + original: result, + value: result, + loc: locFrom(startP), + }; + } + // \{{ — escaped mustache. Jison handles this by: + // 1. Emitting content up to the \ (stripping it) as CONTENT + // 2. Entering emu state which scans to next {{/\{{/\\{{/EOF + // 3. Emitting that chunk as another CONTENT + // + // We match this by: emit what we have so far (up to the \, stripped), + // then advance past \{{ and let the emu scan produce the next content. + + // First: emit content accumulated so far (before the backslash) + advanceTo(idx - 1); + result += input.substring(segStart, idx - 1); + if (result.length > 0) { + return { + type: 'ContentStatement', + original: result, + value: result, + loc: locFrom(startP), + }; + } + + // If no content before the \, advance past the \{{ and scan emu content + advanceTo(idx + 2); // past \{{ + const emuStartP = savePos(); + const emuStart = pos; + const nextMu = findNextMustacheOrEnd(pos); + advanceTo(nextMu); + const emuContent = '{{' + input.substring(emuStart, nextMu); + return { + type: 'ContentStatement', + original: emuContent, + value: emuContent, + loc: makeLoc(startP.line, startP.col, line, col), + }; + } + + // Normal {{ — stop here + advanceTo(idx); + result += input.substring(segStart, idx); + if (result.length === 0) return null; + return { + type: 'ContentStatement', + original: result, + value: result, + loc: locFrom(startP), + }; + } + + result += input.substring(segStart, len); + advanceTo(len); + if (result.length === 0) return null; + return { + type: 'ContentStatement', + original: result, + value: result, + loc: locFrom(startP), + }; + } + + function findNextMustacheOrEnd(from) { + // Emu state: scan for next {{ (escaped or not) or EOF. + // Returns position to stop content at. The main scanContent loop + // will then handle escape detection on the next iteration. + const idx = input.indexOf('{{', from); + if (idx === -1) return len; + // If preceded by backslash, stop before the backslash + if (idx > from && input.charCodeAt(idx - 1) === CH_BACKSLASH) { + return idx - 1; + } + return idx; + } + + // === Mustache classification === + // After seeing '{{', classify what kind of statement this is. + + function consumeOpen() { + // We're at '{{', consume it and return info about the opener + const openStart = savePos(); + const startPos = pos; + + // Check for {{{{ (raw block) + if (startsWith('{{{{')) { + advanceTo(pos + 4); + // Check if it's a close raw block {{{{/ + if (cc() === CH_SLASH) { + // This shouldn't happen at statement level — it's handled in raw block parsing + error('Unexpected raw block close'); + } + return { kind: 'raw', start: openStart, raw: input.substring(startPos, pos) }; + } + + advanceTo(pos + 2); // skip {{ + + // Check for ~ (left strip) + let leftStrip = false; + if (cc() === CH_TILDE) { + leftStrip = true; + col++; + pos++; + } + + // Check optional leading whitespace before 'else' + const afterStripPos = pos; + const afterStripLine = line; + const afterStripCol = col; + skipWs(); + const wsSkipped = pos > afterStripPos; + + const c = cc(); + + // Check for else keyword + if (startsWith('else')) { + const afterElse = pos + 4; + const charAfterElse = input.charCodeAt(afterElse); + + // Check if it's standalone {{else}} or {{else~}} + if ( + isWhitespace(charAfterElse) || + charAfterElse === CH_TILDE || + charAfterElse === CH_RBRACE + ) { + // Scan past 'else' and whitespace + advanceTo(afterElse); + skipWs(); + + // Check for ~?}} + let rightStrip = false; + if (cc() === CH_TILDE) { + rightStrip = true; + col++; + pos++; + } + if (cc() === CH_RBRACE && cc(1) === CH_RBRACE) { + // Standalone inverse: {{else}} + advanceTo(pos + 2); + const raw = input.substring(startPos, pos); + return { + kind: 'inverse', + start: openStart, + strip: { open: leftStrip, close: rightStrip }, + raw, + }; + } + + // It's {{else something}} — openInverseChain + // We already advanced to afterElse on line 482, and may have + // scanned past whitespace/~ looking for }}. Reset to afterElse + // and re-skip whitespace to position correctly. + // Note: line/col were correctly tracked by advanceTo(afterElse), + // we just need to reset pos and re-advance if we overshot. + if (pos !== afterElse) { + // We overshot — need to recompute. Save the correct state from + // when we were at afterElse. Since advanceTo already tracked + // line/col to afterElse, and then we only moved forward through + // whitespace/~, we need to go back. Recompute from scratch: + pos = afterStripPos; + line = afterStripLine; + col = afterStripCol; + advanceTo(afterElse); + } + skipWs(); + const raw = input.substring(startPos, pos); + return { + kind: 'inverseChain', + start: openStart, + leftStrip, + raw, + }; + } + + // Not followed by appropriate char — it's an identifier starting with 'else' + // Restore position + pos = afterStripPos; + line = afterStripLine; + col = afterStripCol; + } else if (wsSkipped) { + // Restore whitespace if we didn't match 'else' + pos = afterStripPos; + line = afterStripLine; + col = afterStripCol; + } + + switch (c) { + case CH_BANG: { + // Comment: {{! or {{!-- + // We need to match Jison's behavior exactly. + // + // Jison has two comment rules (longest-match semantics): + // 1. Short: {{~?![\s\S]*?}} — matches any {{!...}} up to first }} + // 2. Long: {{~?!-- enters com state, then [\s\S]*?--~?}} matches body + // + // When both match, Jison picks the LONGER match. So: + // - {{!--}} → short wins (7 chars beats 5 chars for long start) + // - {{!-- hello --}} → long wins (the short would only match {{!-- hello --}}, + // but the long matches the full thing) + // + // Strategy: try short first. If starts with --, also try long. + // Pick the longer match. + + // Don't advance past ! yet — we'll compute raw text from startPos + col++; + pos++; + const afterBang = pos; + + // Try short comment: {{! up to first ~?}} + const shortEnd = input.indexOf('}}', afterBang); + if (shortEnd === -1) error('Unterminated comment'); + let shortRStrip = false; + if (shortEnd > 0 && input.charCodeAt(shortEnd - 1) === CH_TILDE) { + shortRStrip = true; + } + const shortMatchEnd = shortEnd + 2; // past }} + + // Check if this might be a long comment (starts with --) + const startsWithDashDash = + input.charCodeAt(afterBang) === CH_DASH && input.charCodeAt(afterBang + 1) === CH_DASH; + + if (startsWithDashDash) { + // Try long comment: find --~?}} after the initial -- + const longSearchStart = afterBang + 2; + let longMatchEnd = -1; + let longRStrip = false; + let searchFrom = longSearchStart; + + while (searchFrom < len) { + const dashIdx = input.indexOf('--', searchFrom); + if (dashIdx === -1) break; + let afterDash = dashIdx + 2; + let thisRStrip = false; + if (afterDash < len && input.charCodeAt(afterDash) === CH_TILDE) { + thisRStrip = true; + afterDash++; + } + if ( + afterDash + 1 < len && + input.charCodeAt(afterDash) === CH_RBRACE && + input.charCodeAt(afterDash + 1) === CH_RBRACE + ) { + longMatchEnd = afterDash + 2; + longRStrip = thisRStrip; + break; + } + searchFrom = dashIdx + 1; + } + + // Pick the longer match + if (longMatchEnd > shortMatchEnd) { + // Long comment wins + const rawText = input.substring(startPos, longMatchEnd); + advanceTo(longMatchEnd); + return { + kind: 'comment', + start: openStart, + value: stripComment(rawText), + strip: { open: leftStrip, close: longRStrip }, + loc: locFrom(openStart), + }; + } + } + + // Short comment wins (or no long comment match) + const rawText = input.substring(startPos, shortMatchEnd); + advanceTo(shortMatchEnd); + return { + kind: 'comment', + start: openStart, + value: stripComment(rawText), + strip: { open: leftStrip, close: shortRStrip }, + loc: locFrom(openStart), + }; + } + + case CH_GT: { + // Partial: {{> + col++; + pos++; + return { kind: 'partial', start: openStart, leftStrip, raw: input.substring(startPos, pos) }; + } + + case CH_HASH: { + col++; + pos++; + // Check for {{#> (partial block) + if (cc() === CH_GT) { + col++; + pos++; + return { + kind: 'partialBlock', + start: openStart, + leftStrip, + raw: input.substring(startPos, pos), + }; + } + // Check for {{#* (decorator block) + let isDecorator = false; + if (cc() === CH_STAR) { + isDecorator = true; + col++; + pos++; + } + return { + kind: 'block', + start: openStart, + leftStrip, + isDecorator, + raw: input.substring(startPos, pos), + }; + } + + case CH_SLASH: { + // Close block: {{/ + col++; + pos++; + return { kind: 'close', start: openStart, leftStrip, raw: input.substring(startPos, pos) }; + } + + case CH_CARET: { + // ^ — could be standalone inverse {{^}} or open inverse {{^foo}} + col++; + pos++; + skipWs(); + // Check for ~?}} + let rightStrip = false; + if (cc() === CH_TILDE) { + const savedP = pos; + const savedL = line; + const savedC = col; + rightStrip = true; + col++; + pos++; + if (cc() === CH_RBRACE && cc(1) === CH_RBRACE) { + advanceTo(pos + 2); + return { + kind: 'inverse', + start: openStart, + strip: { open: leftStrip, close: rightStrip }, + raw: input.substring(startPos, pos), + }; + } + // Not }}, restore + pos = savedP; + line = savedL; + col = savedC; + rightStrip = false; + } + if (cc() === CH_RBRACE && cc(1) === CH_RBRACE) { + advanceTo(pos + 2); + return { + kind: 'inverse', + start: openStart, + strip: { open: leftStrip, close: false }, + raw: input.substring(startPos, pos), + }; + } + // It's an open inverse block + return { + kind: 'openInverse', + start: openStart, + leftStrip, + raw: input.substring(startPos, pos), + }; + } + + case CH_LBRACE: { + // Triple stache {{{ (unescaped) + col++; + pos++; + return { + kind: 'unescaped', + start: openStart, + leftStrip, + raw: input.substring(startPos, pos), + }; + } + + case CH_AMP: { + // Unescaped {{& + col++; + pos++; + return { + kind: 'mustache', + start: openStart, + leftStrip, + unescaped: true, + raw: input.substring(startPos, pos), + }; + } + + case CH_STAR: { + // Decorator {{* + col++; + pos++; + return { + kind: 'mustache', + start: openStart, + leftStrip, + isDecorator: true, + raw: input.substring(startPos, pos), + }; + } + + default: { + // Regular mustache {{ + return { + kind: 'mustache', + start: openStart, + leftStrip, + raw: input.substring(startPos, pos), + }; + } + } + } + + function consumeClose() { + // Expect }} or ~}} + skipWs(); + let rightStrip = false; + if (cc() === CH_TILDE) { + rightStrip = true; + col++; + pos++; + } + if (cc() !== CH_RBRACE || cc(1) !== CH_RBRACE) { + error("Expected '}}'"); + } + advanceTo(pos + 2); + return rightStrip; + } + + function consumeUnescapedClose() { + // Expect }}} or ~}}} + skipWs(); + let rightStrip = false; + if (cc() === CH_TILDE) { + rightStrip = true; + col++; + pos++; + } + if (cc() !== CH_RBRACE || cc(1) !== CH_RBRACE || cc(2) !== CH_RBRACE) { + error("Expected '}}}'"); + } + advanceTo(pos + 3); + return rightStrip; + } + + // === Expression parsing === + + function parseExpr() { + skipWs(); + const c = cc(); + + // Sub-expression + if (c === CH_LPAREN) return parseSexprOrPath(); + + // Array literal + if (c === CH_LBRACKET && squareSyntax !== 'string') return parseArrayLiteralOrPath(); + + return parseHelperName(); + } + + function parseSexprOrPath() { + const startP = savePos(); // save pos BEFORE sub-expression + const sexpr = parseSexpr(); + // Peek for separator WITHOUT consuming whitespace — the caller + // owns trailing whitespace (affects loc of containing HashPair etc.) + const savedPos = pos, savedLine = line, savedCol = col; + skipWs(); + if (cc() === CH_DOT || cc() === CH_SLASH) { + return parsePath(false, sexpr, startP); + } + // Restore — don't consume trailing whitespace + pos = savedPos; line = savedLine; col = savedCol; + return sexpr; + } + + function parseArrayLiteralOrPath() { + const startP = savePos(); // save pos BEFORE array literal + const arr = parseArrayLiteral(); + const savedPos = pos, savedLine = line, savedCol = col; + skipWs(); + if (cc() === CH_DOT || cc() === CH_SLASH) { + return parsePath(false, arr, startP); + } + pos = savedPos; line = savedLine; col = savedCol; + return arr; + } + + function parseHelperName() { + skipWs(); + const c = cc(); + const startP = savePos(); + + // String literal + if (c === CH_DQUOTE || c === CH_SQUOTE) { + const s = scanString(); + return { type: 'StringLiteral', value: s.value, original: s.value, loc: s.loc }; + } + + // Number literal + if (c === CH_DASH || (c >= CH_0 && c <= CH_9)) { + const savedPos = pos; + const savedLine = line; + const savedCol = col; + const numStr = scanNumber(); + if (numStr !== null && (pos >= len || isLiteralLookahead(cc()))) { + return { + type: 'NumberLiteral', + value: Number(numStr), + original: Number(numStr), + loc: locFrom(startP), + }; + } + // Restore — might be a negative path or ID starting with dash + pos = savedPos; + line = savedLine; + col = savedCol; + } + + // Boolean + if (startsWith('true') && isLiteralLookahead(input.charCodeAt(pos + 4))) { + advanceTo(pos + 4); + return { type: 'BooleanLiteral', value: true, original: true, loc: locFrom(startP) }; + } + if (startsWith('false') && isLiteralLookahead(input.charCodeAt(pos + 5))) { + advanceTo(pos + 5); + return { type: 'BooleanLiteral', value: false, original: false, loc: locFrom(startP) }; + } + + // Undefined + if (startsWith('undefined') && isLiteralLookahead(input.charCodeAt(pos + 9))) { + advanceTo(pos + 9); + return { + type: 'UndefinedLiteral', + original: undefined, + value: undefined, + loc: locFrom(startP), + }; + } + + // Null + if (startsWith('null') && isLiteralLookahead(input.charCodeAt(pos + 4))) { + advanceTo(pos + 4); + return { type: 'NullLiteral', original: null, value: null, loc: locFrom(startP) }; + } + + // Data path (@...) + if (c === CH_AT) { + col++; + pos++; + return parseDataName(startP); + } + + // Path (starting with ID, .., ., or escaped [literal]) + return parsePath(false, false); + } + + function parseDataName(startP) { + // After @, only path segments (IDs) are valid, not numbers. + // In Jison, @ is DATA token, then pathSegments expects ID (not NUMBER). + // Digits are valid ID chars but the Jison lexer matches them as NUMBER first. + // So @0, @1, etc. are parse errors in Jison. + const c = cc(); + if (c >= CH_0 && c <= CH_9) { + error('Expected path identifier after @'); + } + const segments = parsePathSegments(); + return preparePath(true, false, segments, locFrom(startP)); + } + + function parsePath(data, exprHead, exprHeadStartP) { + const startP = exprHeadStartP || savePos(); + + if (exprHead) { + // exprHead sep pathSegments + const sep = scanSep(); + if (!sep) error('Expected separator after sub-expression in path'); + const segments = parsePathSegments(); + return preparePath(false, exprHead, segments, locFrom(startP)); + } + + // pathSegments: ID (sep ID)* + const segments = parsePathSegments(); + return preparePath(data, false, segments, locFrom(startP)); + } + + function parsePathSegments() { + const segments = []; + const first = scanIdOrEscaped(); + if (first === null) error('Expected path identifier'); + segments.push({ part: idFromToken(first), original: first }); + + while (pos < len) { + const savedPos = pos; + const savedLine = line; + const savedCol = col; + const sep = scanSep(); + if (!sep) break; + const id = scanIdOrEscaped(); + if (id === null) { + // Trailing separator (e.g. "foo." or "foo/") — restore and stop + // Let downstream (Glimmer) handle the error + pos = savedPos; + line = savedLine; + col = savedCol; + break; + } + segments.push({ part: idFromToken(id), original: id, separator: sep }); + } + + return segments; + } + + function scanIdOrEscaped() { + if (cc() === CH_LBRACKET) { + return scanEscapedLiteral(); + } + // Handle '..' and '.' as valid ID tokens (per Jison lexer rules) + if (cc() === CH_DOT && cc(1) === CH_DOT) { + col += 2; + pos += 2; + return '..'; + } + if (cc() === CH_DOT && isLookahead(cc(1))) { + col++; + pos++; + return '.'; + } + return scanId(); + } + + function scanSep() { + if (cc() === CH_DOT && cc(1) === CH_HASH) { + col += 2; + pos += 2; + return '.#'; + } + if (cc() === CH_DOT || cc() === CH_SLASH) { + const c = input[pos]; + col++; + pos++; + return c; + } + return null; + } + + function preparePath(data, sexpr, parts, loc) { + let original; + if (data) { + original = '@'; + } else if (sexpr) { + original = sexpr.original + '.'; + } else { + original = ''; + } + + const tail = []; + let depth = 0; + + for (let i = 0; i < parts.length; i++) { + const part = parts[i].part; + const isLiteral = parts[i].original !== part; + const separator = parts[i].separator; + const partPrefix = separator === '.#' ? '#' : ''; + + original += (separator || '') + part; + + if (!isLiteral && (part === '..' || part === '.' || part === 'this')) { + if (tail.length > 0) { + throw new Exception('Invalid path: ' + original, { loc }); + } else if (part === '..') { + depth++; + } + } else { + tail.push(`${partPrefix}${part}`); + } + } + + const head = sexpr || tail.shift(); + + return { + type: 'PathExpression', + this: original.startsWith('this.'), + data: !!data, + depth, + head, + tail, + parts: head ? [head, ...tail] : tail, + original, + loc, + }; + } + + // === Hash parsing === + + function isAtHash() { + // Look ahead: current token is ID followed by = + if (!isIdChar(cc()) && cc() !== CH_LBRACKET) return false; + // Scan forward past the ID + let p = pos; + if (input.charCodeAt(p) === CH_LBRACKET) { + // Escaped literal — find closing ] + p++; + while (p < len && input.charCodeAt(p) !== CH_RBRACKET) { + if (input.charCodeAt(p) === CH_BACKSLASH) p++; + p++; + } + p++; // skip ] + } else { + while (p < len && isIdChar(input.charCodeAt(p))) p++; + } + // Skip whitespace + while (p < len && isWhitespace(input.charCodeAt(p))) p++; + return p < len && input.charCodeAt(p) === CH_EQ; + } + + function parseHash() { + const startP = savePos(); + const pairs = []; + let endP; + while (pos < len && isAtHash()) { + pairs.push(parseHashPair()); + endP = savePos(); // capture end BEFORE skipping whitespace + skipWs(); + } + if (pairs.length === 0) return undefined; + return { type: 'Hash', pairs, loc: makeLoc(startP.line, startP.col, endP.line, endP.col) }; + } + + function parseHashPair() { + skipWs(); + const startP = savePos(); + const key = scanIdOrEscaped(); + if (key === null) error('Expected hash key'); + skipWs(); + if (cc() !== CH_EQ) error("Expected '=' in hash"); + col++; + pos++; // skip = + skipWs(); + const value = parseExpr(); + return { type: 'HashPair', key: idFromToken(key), value, loc: locFrom(startP) }; + } + + // === Sub-expression parsing === + + function parseSexpr() { + const startP = savePos(); + if (cc() !== CH_LPAREN) error("Expected '('"); + col++; + pos++; // skip ( + skipWs(); + + // Check for hash-only sexpr: (key=val) + if (isAtHash()) { + const hash = parseHash(); + skipWs(); + if (cc() !== CH_RPAREN) error("Expected ')'"); + col++; + pos++; + const loc = locFrom(startP); + return hashSyntax(hash, loc, { yy, syntax: 'expr' }); + } + + const path = parseExpr(); + const params = []; + let hash = undefined; + + skipWs(); + while (cc() !== CH_RPAREN && pos < len) { + if (isAtHash()) { + hash = parseHash(); + break; + } + params.push(parseExpr()); + skipWs(); + } + + skipWs(); + if (cc() !== CH_RPAREN) error("Expected ')'"); + col++; + pos++; + + return { type: 'SubExpression', path, params, hash, loc: locFrom(startP) }; + } + + // === Array literal === + + function parseArrayLiteral() { + const startP = savePos(); + if (cc() !== CH_LBRACKET) error("Expected '['"); + col++; + pos++; // skip [ + const items = []; + skipWs(); + while (cc() !== CH_RBRACKET && pos < len) { + items.push(parseExpr()); + skipWs(); + } + if (cc() !== CH_RBRACKET) error("Expected ']'"); + col++; + pos++; + const loc = locFrom(startP); + return squareSyntax(items, loc, { yy, syntax: 'expr' }); + } + + // === Block params === + + function parseBlockParams() { + skipWs(); + // Look for 'as |' + if (!startsWith('as')) return null; + const afterAs = pos + 2; + if (afterAs >= len || !isWhitespace(input.charCodeAt(afterAs))) return null; + + // Scan past 'as' + whitespace + let p = afterAs; + while (p < len && isWhitespace(input.charCodeAt(p))) p++; + if (p >= len || input.charCodeAt(p) !== CH_PIPE) return null; + + // It's block params + advanceTo(p + 1); // past 'as' + ws + | + const ids = []; + skipWs(); + while (cc() !== CH_PIPE && pos < len) { + const id = scanId(); + if (id === null) error('Expected block param identifier'); + ids.push(idFromToken(id)); + skipWs(); + } + if (cc() !== CH_PIPE) error("Expected '|' to close block params"); + col++; + pos++; + return ids; + } + + // === Statement parsers === + + function parseProgram(terminators) { + const stmts = []; + while (pos < len) { + // Check if we're at a terminator + if (startsWith('{{')) { + if (isTerminator(terminators)) break; + } + + const content = scanContent(); + if (content) { + stmts.push(content); + continue; + } + + if (pos >= len) break; + + // We're at a {{ + if (isTerminator(terminators)) break; + const stmt = parseOpenStatement(); + if (stmt) stmts.push(stmt); + } + + return prepareProgram(stmts); + } + + function isTerminator(terminators) { + if (!terminators) return false; + // Save position + const savedPos = pos; + const savedLine = line; + const savedCol = col; + + // Check what's after {{ + if (!startsWith('{{')) return false; + + // Peek at the opener type + let p = pos + 2; + + // Skip ~ + if (p < len && input.charCodeAt(p) === CH_TILDE) p++; + + // Skip whitespace (for else detection) + let pw = p; + while (pw < len && isWhitespace(input.charCodeAt(pw))) pw++; + + const c = input.charCodeAt(p); + + for (const t of terminators) { + switch (t) { + case 'close': + if (c === CH_SLASH) return true; + break; + case 'inverse': + // {{^}} or {{^foo + if (c === CH_CARET) return true; + // {{else}} or {{else foo + if (input.startsWith('else', pw)) return true; + break; + } + } + + return false; + } + + function parseOpenStatement() { + const open = consumeOpen(); + + switch (open.kind) { + case 'comment': + return { + type: 'CommentStatement', + value: open.value, + strip: open.strip, + loc: open.loc, + }; + + case 'mustache': + return parseMustache(open); + + case 'unescaped': + return parseUnescapedMustache(open); + + case 'block': + return parseBlock(open); + + case 'openInverse': + return parseInverseBlock(open); + + case 'partial': + return parsePartial(open); + + case 'partialBlock': + return parsePartialBlock(open); + + case 'raw': + return parseRawBlock(open); + + case 'inverse': + // Standalone inverse at statement level — this is an error + // The Jison parser would fail here too + error('Unexpected inverse'); + break; + + case 'close': + error('Unexpected close block'); + break; + + case 'inverseChain': + error('Unexpected inverse chain'); + break; + + default: + error('Unexpected token: ' + open.kind); + } + } + + function parseMustache(open) { + skipWs(); + + // Check for hash-only mustache: {{key=val}} + if (isAtHash()) { + const hash = parseHash(); + const rightStrip = consumeClose(); + const loc = locFrom(open.start); + const strip = { open: open.leftStrip || false, close: rightStrip }; + const wrappedPath = hashSyntax(hash, loc, { yy, syntax: 'expr' }); + return { + type: open.isDecorator ? 'Decorator' : 'MustacheStatement', + path: wrappedPath, + params: [], + hash: undefined, + escaped: determineEscaped(open), + strip, + loc, + }; + } + + const path = parseExpr(); + const params = []; + let hash = undefined; + + skipWs(); + while (pos < len && cc() !== CH_RBRACE && !(cc() === CH_TILDE && cc(1) === CH_RBRACE)) { + if (isAtHash()) { + hash = parseHash(); + break; + } + params.push(parseExpr()); + skipWs(); + } + + const rightStrip = consumeClose(); + const loc = locFrom(open.start); + const strip = { open: open.leftStrip || false, close: rightStrip }; + + return { + type: open.isDecorator ? 'Decorator' : 'MustacheStatement', + path, + params, + hash, + escaped: determineEscaped(open), + strip, + loc, + }; + } + + function determineEscaped(open) { + if (open.unescaped) return false; + if (open.kind === 'unescaped') return false; + const raw = open.raw || ''; + // Check for {{{ or {{& — both are unescaped + const c3 = raw.charAt(2); + const c4 = raw.charAt(3); + if (c3 === '{' || c3 === '&') return false; + if (c3 === '~' && (c4 === '{' || c4 === '&')) return false; + return true; + } + + function parseUnescapedMustache(open) { + skipWs(); + const path = parseExpr(); + const params = []; + let hash = undefined; + + skipWs(); + while ( + pos < len && + !(cc() === CH_RBRACE && cc(1) === CH_RBRACE && cc(2) === CH_RBRACE) && + !(cc() === CH_TILDE && cc(1) === CH_RBRACE) + ) { + if (isAtHash()) { + hash = parseHash(); + break; + } + params.push(parseExpr()); + skipWs(); + } + + const rightStrip = consumeUnescapedClose(); + const loc = locFrom(open.start); + + return { + type: 'MustacheStatement', + path, + params, + hash, + escaped: false, + strip: { open: open.leftStrip || false, close: rightStrip }, + loc, + }; + } + + // === Block parsing === + + function parseBlock(open) { + skipWs(); + const path = parseExpr(); + const params = []; + let hash = undefined; + let blockParams = undefined; + + skipWs(); + while (pos < len && cc() !== CH_RBRACE && !(cc() === CH_TILDE && cc(1) === CH_RBRACE)) { + // Check for block params (as |...|) + if (startsWith('as') && isWhitespace(input.charCodeAt(pos + 2))) { + const bp = parseBlockParams(); + if (bp) { + blockParams = bp; + break; + } + } + if (isAtHash()) { + hash = parseHash(); + skipWs(); + // Still check for block params after hash + if (startsWith('as') && isWhitespace(input.charCodeAt(pos + 2))) { + blockParams = parseBlockParams(); + } + break; + } + params.push(parseExpr()); + skipWs(); + } + + const rightStrip = consumeClose(); + const openInfo = { + open: open.raw, + path, + params, + hash, + blockParams, + strip: { open: open.leftStrip || false, close: rightStrip }, + }; + + // Parse the block body + const program = parseProgram(['close', 'inverse']); + + // Check for inverse + let inverseAndProgram = undefined; + if (pos < len && startsWith('{{')) { + const savedPos = pos; + const savedLine = line; + const savedCol = col; + const nextOpen = consumeOpen(); + + if (nextOpen.kind === 'inverse') { + const inverseProgram = parseProgram(['close']); + inverseAndProgram = { strip: nextOpen.strip, program: inverseProgram }; + } else if (nextOpen.kind === 'inverseChain') { + inverseAndProgram = parseInverseChain(nextOpen); + } else if (nextOpen.kind === 'close') { + // Restore — close will be parsed below + pos = savedPos; + line = savedLine; + col = savedCol; + } else { + pos = savedPos; + line = savedLine; + col = savedCol; + } + } + + // Parse close block + const close = parseCloseBlock(path); + + return buildBlock(openInfo, program, inverseAndProgram, close, false, open.start); + } + + function parseInverseBlock(open) { + // Same as parseBlock but with inverted=true + skipWs(); + const path = parseExpr(); + const params = []; + let hash = undefined; + let blockParams = undefined; + + skipWs(); + while (pos < len && cc() !== CH_RBRACE && !(cc() === CH_TILDE && cc(1) === CH_RBRACE)) { + if (startsWith('as') && isWhitespace(input.charCodeAt(pos + 2))) { + const bp = parseBlockParams(); + if (bp) { + blockParams = bp; + break; + } + } + if (isAtHash()) { + hash = parseHash(); + skipWs(); + if (startsWith('as') && isWhitespace(input.charCodeAt(pos + 2))) { + blockParams = parseBlockParams(); + } + break; + } + params.push(parseExpr()); + skipWs(); + } + + const rightStrip = consumeClose(); + const openInfo = { + path, + params, + hash, + blockParams, + strip: { open: open.leftStrip || false, close: rightStrip }, + }; + + const program = parseProgram(['close', 'inverse']); + + let inverseAndProgram = undefined; + if (pos < len && startsWith('{{')) { + const savedPos = pos; + const savedLine = line; + const savedCol = col; + const nextOpen = consumeOpen(); + + if (nextOpen.kind === 'inverse') { + const inverseProgram = parseProgram(['close']); + inverseAndProgram = { strip: nextOpen.strip, program: inverseProgram }; + } else if (nextOpen.kind === 'inverseChain') { + inverseAndProgram = parseInverseChain(nextOpen); + } else { + pos = savedPos; + line = savedLine; + col = savedCol; + } + } + + const close = parseCloseBlock(path); + + return buildBlock(openInfo, program, inverseAndProgram, close, true, open.start); + } + + function parseInverseChain(chainOpen) { + // chainOpen is an inverseChain opener ({{else if ...}}) + skipWs(); + const path = parseExpr(); + const params = []; + let hash = undefined; + let blockParams = undefined; + + skipWs(); + while (pos < len && cc() !== CH_RBRACE && !(cc() === CH_TILDE && cc(1) === CH_RBRACE)) { + if (startsWith('as') && isWhitespace(input.charCodeAt(pos + 2))) { + const bp = parseBlockParams(); + if (bp) { + blockParams = bp; + break; + } + } + if (isAtHash()) { + hash = parseHash(); + skipWs(); + if (startsWith('as') && isWhitespace(input.charCodeAt(pos + 2))) { + blockParams = parseBlockParams(); + } + break; + } + params.push(parseExpr()); + skipWs(); + } + + const rightStrip = consumeClose(); + const openInfo = { + open: chainOpen.raw, + path, + params, + hash, + blockParams, + strip: { open: chainOpen.leftStrip || false, close: rightStrip }, + }; + + const program = parseProgram(['close', 'inverse']); + + let nestedInverse = undefined; + if (pos < len && startsWith('{{')) { + const savedPos = pos; + const savedLine = line; + const savedCol = col; + const nextOpen = consumeOpen(); + + if (nextOpen.kind === 'inverse') { + const inverseProgram = parseProgram(['close']); + nestedInverse = { strip: nextOpen.strip, program: inverseProgram }; + } else if (nextOpen.kind === 'inverseChain') { + nestedInverse = parseInverseChain(nextOpen); + } else { + pos = savedPos; + line = savedLine; + col = savedCol; + } + } + + // Build the inner block (using close = nestedInverse's last close or the parent's) + // The close strip for chained blocks comes from the parent's close block + const innerBlock = buildBlock(openInfo, program, nestedInverse, nestedInverse, false, chainOpen.start); + + const wrapperProgram = prepareProgram([innerBlock], program.loc); + wrapperProgram.chained = true; + + return { strip: openInfo.strip, program: wrapperProgram, chain: true }; + } + + function parseCloseBlock(openPath) { + if (!startsWith('{{')) error('Expected close block'); + const open = consumeOpen(); + if (open.kind !== 'close') error('Expected close block'); + + skipWs(); + const closePath = parseExpr(); + const rightStrip = consumeClose(); + + // Validate close matches open + const openName = openPath.original || openPath.parts?.join?.('/') || ''; + const closeName = closePath.original || closePath.parts?.join?.('/') || ''; + if (openName !== closeName) { + throw new Exception(openName + " doesn't match " + closeName, { loc: openPath.loc }); + } + + return { path: closePath, strip: { open: open.leftStrip || false, close: rightStrip } }; + } + + function buildBlock(openInfo, program, inverseAndProgram, close, inverted, startPos) { + const isDecorator = openInfo.open ? /\*/.test(openInfo.open) : false; + + program.blockParams = openInfo.blockParams; + + let inverse, inverseStrip; + + if (inverseAndProgram) { + if (isDecorator) { + throw new Exception('Unexpected inverse block on decorator', inverseAndProgram); + } + + if (inverseAndProgram.chain) { + inverseAndProgram.program.body[0].closeStrip = close && close.strip; + } + + inverseStrip = inverseAndProgram.strip; + inverse = inverseAndProgram.program; + } + + if (inverted) { + const tmp = inverse; + inverse = program; + program = tmp; + } + + return { + type: isDecorator ? 'DecoratorBlock' : 'BlockStatement', + path: openInfo.path, + params: openInfo.params, + hash: openInfo.hash, + program, + inverse, + openStrip: openInfo.strip, + inverseStrip, + closeStrip: close && close.strip, + loc: locFrom(startPos), + }; + } + + // === Raw block === + + function parseRawBlock(open) { + skipWs(); + const path = parseExpr(); + const params = []; + let hash = undefined; + + skipWs(); + while (pos < len && !(cc() === CH_RBRACE && cc(1) === CH_RBRACE && cc(2) === CH_RBRACE && cc(3) === CH_RBRACE)) { + if (isAtHash()) { + hash = parseHash(); + break; + } + params.push(parseExpr()); + skipWs(); + } + + // Consume }}}} + if (!startsWith('}}}}')) error("Expected '}}}}' to close raw block"); + advanceTo(pos + 4); + + // Scan raw content until {{{{/openName}}}} + // In the Jison 'raw' state, EVERYTHING is content except {{{{/name}}}}. + // Nested {{{{ (not followed by /) is also content. + // We track a nesting depth: {{{{ pushes, {{{{/name}}}} pops. + const openName = path.original || path.parts?.join?.('/') || ''; + const contents = []; + let rawDepth = 1; // we're inside one raw block + + while (pos < len) { + const idx = input.indexOf('{{{{', pos); + if (idx === -1) error('Unterminated raw block'); + + // Content before {{{{ + if (idx > pos) { + const contentStart = savePos(); + const text = input.substring(pos, idx); + advanceTo(idx); + contents.push({ + type: 'ContentStatement', + original: text, + value: text, + loc: locFrom(contentStart), + }); + } + + // Check if it's {{{{/ (potential close) + if (input.charCodeAt(idx + 4) === CH_SLASH) { + // Try to match {{{{/openName}}}} + const closeStart = idx + 5; + let closeEnd = closeStart; + while (closeEnd < len && isIdChar(input.charCodeAt(closeEnd))) closeEnd++; + const closeId = input.substring(closeStart, closeEnd); + + if (input.startsWith('}}}}', closeEnd)) { + if (rawDepth === 1) { + if (closeId === openName) { + // This is our close tag + advanceTo(closeEnd + 4); + + // Build the raw block — Jison uses the overall block loc for program too + const loc = locFrom(open.start); + const program = { + type: 'Program', + body: contents, + strip: {}, + loc, + }; + + return { + type: 'BlockStatement', + path, + params, + hash, + program, + openStrip: {}, + inverseStrip: {}, + closeStrip: {}, + loc, + }; + } + // Mismatch: close tag doesn't match open + throw new Exception(openName + " doesn't match " + closeId, { loc: path.loc }); + } + + if (closeId) { + // It's a close for a nested raw block — just decrement depth and treat as content + rawDepth--; + } + } + + // Not our close — treat {{{{/...}}}} as content + const contentStart = savePos(); + const endOfTag = closeEnd + (input.startsWith('}}}}', closeEnd) ? 4 : 0); + const text = input.substring(idx, endOfTag || idx + 5); + advanceTo(endOfTag || idx + 5); + contents.push({ + type: 'ContentStatement', + original: text, + value: text, + loc: locFrom(contentStart), + }); + } else { + // {{{{ not followed by / — nested raw block opener, treat as content + rawDepth++; + const contentStart = savePos(); + advanceTo(idx + 4); + const text = '{{{{'; + contents.push({ + type: 'ContentStatement', + original: text, + value: text, + loc: locFrom(contentStart), + }); + } + } + + error('Unterminated raw block'); + } + + // === Partial === + + function parsePartial(open) { + skipWs(); + const name = parseExpr(); + const params = []; + let hash = undefined; + + skipWs(); + while (pos < len && cc() !== CH_RBRACE && !(cc() === CH_TILDE && cc(1) === CH_RBRACE)) { + if (isAtHash()) { + hash = parseHash(); + break; + } + params.push(parseExpr()); + skipWs(); + } + + const rightStrip = consumeClose(); + + return { + type: 'PartialStatement', + name, + params, + hash, + indent: '', + strip: { open: open.leftStrip || false, close: rightStrip }, + loc: locFrom(open.start), + }; + } + + function parsePartialBlock(open) { + skipWs(); + const name = parseExpr(); + const params = []; + let hash = undefined; + + skipWs(); + while (pos < len && cc() !== CH_RBRACE && !(cc() === CH_TILDE && cc(1) === CH_RBRACE)) { + if (isAtHash()) { + hash = parseHash(); + break; + } + params.push(parseExpr()); + skipWs(); + } + + const rightStrip = consumeClose(); + + const openInfo = { + path: name, + params, + hash, + strip: { open: open.leftStrip || false, close: rightStrip }, + }; + + const program = parseProgram(['close']); + const close = parseCloseBlock(name); + + return { + type: 'PartialBlockStatement', + name: openInfo.path, + params: openInfo.params, + hash: openInfo.hash, + program, + openStrip: openInfo.strip, + closeStrip: close && close.strip, + loc: locFrom(open.start), + }; + } + + // === Program / root === + + function prepareProgram(statements, loc) { + if (!loc && statements.length) { + const firstLoc = statements[0].loc; + const lastLoc = statements[statements.length - 1].loc; + if (firstLoc && lastLoc) { + loc = { + source: firstLoc.source, + start: { line: firstLoc.start.line, column: firstLoc.start.column }, + end: { line: lastLoc.end.line, column: lastLoc.end.column }, + }; + } + } + return { type: 'Program', body: statements, strip: {}, loc: loc || undefined }; + } + + // === Entry point === + const result = parseProgram(null); + + if (pos < len) { + error('Unexpected content after end of template'); + } + + return result; +} + +function arrayLiteralNode(array, loc) { + return { type: 'ArrayLiteral', items: array, loc }; +} + +function hashLiteralNode(hash, loc) { + return { type: 'HashLiteral', pairs: hash.pairs, loc }; +} diff --git a/packages/@handlebars/parser/stress-test-2.mjs b/packages/@handlebars/parser/stress-test-2.mjs new file mode 100644 index 00000000000..f08424283b2 --- /dev/null +++ b/packages/@handlebars/parser/stress-test-2.mjs @@ -0,0 +1,329 @@ +/** + * Stress test round 2: harder edge cases, pathological inputs, + * real-world Ember patterns, and fuzz-like combinations. + */ +import { v2ParseWithoutProcessing as parse } from './lib/v2-parser.js'; + +let passed = 0, failed = 0; + +function test(tpl, label) { + try { + const ast = parse(tpl); + if (!ast || ast.type !== 'Program') { + console.log(`FAIL [${label}]: got ${ast?.type}`); + failed++; return; + } + passed++; + } catch(e) { + console.log(`FAIL [${label}]: ${e.message?.substring(0,80)}`); + console.log(` template: ${JSON.stringify(tpl).substring(0,80)}`); + failed++; + } +} + +function testError(tpl, label) { + try { + parse(tpl); + console.log(`FAIL [${label}]: expected error but parsed OK`); + console.log(` template: ${JSON.stringify(tpl).substring(0,80)}`); + failed++; + } catch(e) { + passed++; + } +} + +console.log('=== ROUND 2: TRYING TO BREAK IT ===\n'); + +// ===================================================================== +// 1. PATHOLOGICAL / STRESS INPUTS +// ===================================================================== +test('{{a}}{{b}}{{c}}{{d}}{{e}}{{f}}{{g}}{{h}}{{i}}{{j}}{{k}}{{l}}{{m}}{{n}}{{o}}{{p}}', '16 adjacent mustaches'); +test('{{a}}' .repeat(100), '100 adjacent mustaches'); +test('x'.repeat(10000) + '{{foo}}', '10K content then mustache'); +test('{{foo}}' + 'x'.repeat(10000), 'mustache then 10K content'); +test('x'.repeat(100000), '100K content no mustaches'); + +// Deep nesting +let deepBlock = ''; +for (let i = 0; i < 50; i++) deepBlock += `{{#a${i}}}`; +deepBlock += 'x'; +for (let i = 49; i >= 0; i--) deepBlock += `{{/a${i}}}`; +test(deepBlock, '50-deep nested blocks'); + +let deepSexpr = '{{'; +for (let i = 0; i < 20; i++) deepSexpr += '(foo '; +deepSexpr += 'bar'; +for (let i = 0; i < 20; i++) deepSexpr += ')'; +deepSexpr += '}}'; +test(deepSexpr, '20-deep nested sub-expressions'); + +// Many params +test('{{foo ' + Array.from({length: 50}, (_, i) => `p${i}`).join(' ') + '}}', '50 params'); +test('{{foo ' + Array.from({length: 50}, (_, i) => `k${i}=v${i}`).join(' ') + '}}', '50 hash pairs'); + +// ===================================================================== +// 2. BOUNDARY CONDITIONS — MINIMAL/EMPTY VARIANTS +// ===================================================================== +testError('{{}}', 'empty mustache — should error'); +testError('{{~}}', 'just strip in mustache'); +testError('{{~ ~}}', 'strips with whitespace only'); +test('{{!}}', 'empty short comment'); +test('{{!-- --}}', 'long comment with only spaces'); +test('{{!----}}', 'long comment empty body'); +test('{{#foo}}{{/foo}}', 'empty block body'); +test('{{#foo}} {{/foo}}', 'block with whitespace body'); +test('{{#foo}}\n{{/foo}}', 'block with newline body'); +test('{{#foo}}{{else}}{{/foo}}', 'block empty both branches'); +test('{{{{raw}}}}{{{{/raw}}}}', 'empty raw block'); + +// ===================================================================== +// 3. ESCAPED MUSTACHES — ROUND 2 (the area where we found the hang) +// ===================================================================== +test('\\{{', 'bare escaped open'); +test('\\{{}}', 'escaped then close'); +test('\\{{foo}}\\{{bar}}', 'two escaped mustaches'); +test('text\\{{a}}middle\\{{b}}end', 'escaped with text between'); +test('\\\\{{foo}}after', 'double-escaped then content'); +test('\\\\\\{{foo}}', 'triple backslash before {{'); +test('x\\{{y\\{{z', 'multiple escaped no close'); +test('\\{{\\{{\\{{', 'triple escaped open'); + +// ===================================================================== +// 4. LINE ENDING VARIANTS +// ===================================================================== +test('line1\nline2\n{{foo}}\nline4', 'LF line endings'); +test('line1\r\nline2\r\n{{foo}}\r\nline4', 'CRLF line endings'); +test('line1\rline2\r{{foo}}\rline4', 'CR-only line endings'); +test('mixed\n\r\n\r{{foo}}', 'mixed line endings'); +test('{{#foo}}\r\n content\r\n{{/foo}}', 'CRLF in block'); + +// ===================================================================== +// 5. UNICODE STRESS +// ===================================================================== +test('{{emoji-🎉}}', 'emoji in id (if valid)'); +test('{{foo "🎉 hello 世界"}}', 'emoji in string param'); +test('{{foo "\\""}}', 'escaped quote in string'); +test('{{"multi\nline"}}', 'newline in string'); +test("{{foo 'it\\'s'}}", 'apostrophe escaped'); +test('{{foo "say \\"hello\\""}}', 'multiple escaped quotes'); + +// ===================================================================== +// 6. KEYWORDS AS ESCAPED IDENTIFIERS +// ===================================================================== +test('{{[if]}}', 'escaped keyword if'); +test('{{[else]}}', 'escaped keyword else'); +test('{{[each]}}', 'escaped keyword each'); +test('{{[true]}}', 'escaped keyword true'); +test('{{[false]}}', 'escaped keyword false'); +test('{{[null]}}', 'escaped keyword null'); +test('{{[undefined]}}', 'escaped keyword undefined'); +test('{{[as]}}', 'escaped keyword as'); +test('{{foo.[if].bar}}', 'escaped keyword in path'); +test('{{foo [if]=bar}}', 'escaped keyword as hash key'); + +// ===================================================================== +// 7. STRIP FLAGS — EXHAUSTIVE COMBOS WITH BLOCKS +// ===================================================================== +test('{{~#foo~}}{{~/foo~}}', 'block all strip empty'); +test('{{~#foo}}content{{/foo~}}', 'block strip open-left close-right'); +test('{{#foo~}}content{{~/foo}}', 'block strip open-right close-left'); +test('{{~#foo~}}x{{~else~}}y{{~/foo~}}', 'block+else all strip'); +test('{{~#foo~}}x{{~^~}}y{{~/foo~}}', 'block+caret all strip'); +test('{{~#foo as |x|~}}{{x}}{{~/foo~}}', 'block params all strip'); +test('{{~> partial~}}', 'partial both strip'); +test('{{~#> partial~}}x{{~/partial~}}', 'partial block both strip'); + +// ===================================================================== +// 8. COMPLEX REAL-WORLD PATTERNS +// ===================================================================== +test(` + +`.trim(), 'real: complex nav component'); + +test(` +{{#let + (hash + title=@model.title + description=@model.description + tags=(if @model.tags @model.tags (array)) + author=(hash + name=@model.author.name + avatar=@model.author.avatar + ) + ) + as |data| +}} +
+

{{data.title}}

+

{{data.description}}

+ {{#each data.tags as |tag|}} + {{tag}} + {{/each}} +
+ {{data.author.name}} + {{data.author.name}} +
+
+{{/let}} +`.trim(), 'real: let with complex hash'); + +test(` +{{#each @rows as |row rowIndex|}} + + {{#each @columns as |column colIndex|}} + + {{get (get @data rowIndex) column.key}} + + {{/each}} + +{{/each}} +`.trim(), 'real: data grid component'); + +test(` +{{! This is a file upload component }} +{{!-- + It supports drag and drop, file selection, + and previewing uploaded files. + @param {Array} @files - current files + @param {Function} @onUpload - upload handler +--}} +
+ {{#if @files.length}} + {{#each @files as |file|}} +
+ {{#if (eq file.type "image")}} + {{file.name}} + {{else}} + {{file.extension}} + {{/if}} + {{file.name}} + +
+ {{/each}} + {{else}} +

{{t "upload.dropzone"}}

+ {{/if}} +
+`.trim(), 'real: file upload component'); + +// ===================================================================== +// 9. TRICKY CLOSE/OPEN PATTERNS +// ===================================================================== +test('}}{{foo}}', 'stray close then real mustache'); +test('}}}}{{foo}}', 'double stray close then mustache'); +test('}}}{{foo}}', 'triple close before mustache'); +test('{{foo}}}}', 'mustache then stray close'); +test('{{{foo}}}}}', 'triple stache then extra braces'); +test('{{foo}}{', 'mustache then single brace'); +test('}{{foo}}', 'single close then mustache'); + +// ===================================================================== +// 10. COMMENTS WITH TRICKY CONTENT +// ===================================================================== +test('{{!-- }} --}}', 'long comment with }} inside'); +test('{{!-- {{ --}}', 'long comment with {{ inside'); +test('{{!-- {{#if x}} --}}', 'long comment with block inside'); +test('{{!-- {{!-- nested --}} --}}', 'comment with comment-like inside'); +test('{{! }} }}', 'short comment with }}'); +test('{{!-- \n\n\n --}}', 'long comment with blank lines'); +test('before{{!-- mid --}}after', 'comment between content'); +test('{{foo}}{{!-- between --}}{{bar}}', 'comment between mustaches'); + +// ===================================================================== +// 11. HASH-ONLY MUSTACHES (the {{key=val}} syntax) +// ===================================================================== +test('{{a=b}}', 'hash-only single pair'); +test('{{a=b c=d e=f}}', 'hash-only multiple pairs'); +test('{{a=(foo bar)}}', 'hash-only with sub-expr value'); +test('{{a="string" b=123 c=true d=null}}', 'hash-only mixed value types'); + +// ===================================================================== +// 12. PARTIAL EDGE CASES +// ===================================================================== +test('{{> (lookup . "partialName")}}', 'dynamic partial name'); +testError('{{> foo as |bar|}}', 'partial with as — invalid syntax'); + +// ===================================================================== +// 13. ELSE CHAIN STRESS +// ===================================================================== +test('{{#if a}}1{{else if b}}2{{else if c}}3{{else if d}}4{{else if e}}5{{else}}6{{/if}}', '5 else-if chains'); +test('{{#if a}}\n {{#if b}}\n inner\n {{else}}\n else-inner\n {{/if}}\n{{else if c}}\n chain\n{{else}}\n final\n{{/if}}', 'nested blocks in else chain'); + +// ===================================================================== +// 14. PATH EXPRESSION EDGE CASES +// ===================================================================== +test('{{foo.bar.baz.qux.quux.corge.grault.garply}}', '8-segment path'); +test('{{@index}}', 'common data: @index'); +test('{{@key}}', 'common data: @key'); +test('{{@first}}', 'common data: @first'); +test('{{@last}}', 'common data: @last'); +test('{{@root.foo}}', 'data root path'); +test('{{this.this}}', 'this.this'); +test('{{../this}}', 'parent this'); +test('{{this.[foo bar]}}', 'this with escaped segment'); +test('{{foo.[0]}}', 'numeric-looking escaped segment'); +test('{{foo.[class]}}', 'reserved-word escaped segment'); + +// ===================================================================== +// 15. NUMBER EDGE CASES +// ===================================================================== +test('{{foo 0}}', 'zero param'); +test('{{foo -0}}', 'negative zero'); +test('{{foo 999999999}}', 'large number'); +test('{{foo -999999999}}', 'large negative'); +test('{{foo 1.0}}', 'float one'); +test('{{foo 0.001}}', 'small float'); +test('{{foo 3.14159265}}', 'pi-ish'); + +// ===================================================================== +// 16. WHITESPACE IN UNUSUAL PLACES +// ===================================================================== +testError('{{ # foo }}x{{ / foo }}', 'spaces around # — invalid (Jison also rejects)'); +testError('{{ > foo }}', 'space before > — invalid (Jison also rejects)'); +test('{{ ! comment }}', 'space before !'); +test('{{ foo bar = baz }}', 'spaces around = in hash'); + +// ===================================================================== +// RESULTS +// ===================================================================== +console.log(`\n${'='.repeat(60)}`); +console.log(`${passed} passed, ${failed} failed out of ${passed + failed}`); +console.log(`${'='.repeat(60)}`); diff --git a/packages/@handlebars/parser/stress-test-3.mjs b/packages/@handlebars/parser/stress-test-3.mjs new file mode 100644 index 00000000000..7476ecefc8c --- /dev/null +++ b/packages/@handlebars/parser/stress-test-3.mjs @@ -0,0 +1,280 @@ +/** + * Stress test round 3: + * 1. Parse ALL .hbs/.gts/.gjs across every project in ~/real-world-project + * 2. Adversarial fuzzing — generated templates with random combinations + * 3. Pathological patterns designed to break recursive descent parsers + */ +import { readFileSync } from 'node:fs'; +import { execSync } from 'node:child_process'; +import jisonParser from './lib/parser.js'; +import * as Helpers from './lib/helpers.js'; +import { v2ParseWithoutProcessing } from './lib/v2-parser.js'; + +let b = {}; +for (let h in Helpers) { if (Object.prototype.hasOwnProperty.call(Helpers, h)) b[h] = Helpers[h]; } +function jison(input) { + jisonParser.yy = b; + jisonParser.yy.locInfo = l => new Helpers.SourceLocation(undefined, l); + jisonParser.yy.syntax = { square: 'string', hash: (h,l) => ({type:'HashLiteral',pairs:h.pairs,loc:l}) }; + return jisonParser.parse(input); +} + +let passed = 0, failed = 0, total = 0; +const failures = []; + +function compare(tpl, label) { + total++; + let j, v, jErr, vErr; + try { j = jison(tpl); } catch(e) { jErr = e; } + try { v = v2ParseWithoutProcessing(tpl); } catch(e) { vErr = e; } + if (jErr && vErr) { passed++; return; } + if (!!jErr !== !!vErr) { + failed++; + if (failures.length < 30) failures.push({ label, issue: 'error mismatch', jison: jErr ? 'ERR' : 'OK', v2: vErr ? 'ERR: ' + vErr.message?.substring(0,60) : 'OK' }); + return; + } + const jj = JSON.stringify(j), vj = JSON.stringify(v); + if (jj === vj) { passed++; return; } + failed++; + // Find diff point + let i = 0; + while (i < jj.length && i < vj.length && jj[i] === vj[i]) i++; + const strip = (k,v) => k === 'loc' || k === 'source' ? undefined : v; + const locOnly = JSON.stringify(j, strip) === JSON.stringify(v, strip); + if (failures.length < 30) failures.push({ + label, + issue: locOnly ? 'LOC diff' : 'STRUCTURAL diff', + jison: jj.substring(Math.max(0,i-25), i+25), + v2: vj.substring(Math.max(0,i-25), i+25), + }); +} + +// ===================================================================== +// PART 1: All templates in ~/real-world-project +// ===================================================================== +console.log('=== PART 1: All templates in ~/real-world-project ===\n'); + +const allFiles = execSync( + 'find /Users/johanrd/real-world-project -name "*.hbs" -o -name "*.gts" -o -name "*.gjs" 2>/dev/null | grep -v node_modules | grep -v dist | grep -v tmp | grep -v .claude', + { encoding: 'utf8' } +).trim().split('\n').filter(Boolean); + +console.log(`Found ${allFiles.length} files`); + +let templateCount = 0; +for (const f of allFiles) { + try { + const content = readFileSync(f, 'utf8'); + const ext = f.split('.').pop(); + if (ext === 'hbs') { + templateCount++; + compare(content, f.replace(/.*\/real-world-project\//, '')); + } else { + // .gts/.gjs — extract + const regex = /