From 08dc6ec040ba71be215d233983154e54d73b3cb5 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 4 May 2026 00:47:02 -0700 Subject: [PATCH 01/36] Run freebuff Windows build + smoke on every push MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tree-sitter wasm regression that crashed freebuff 0.0.62 only manifested on real Windows. CI was Linux-only, macOS dev machines behaved fine, and the Windows binary was only built+smoked at release time (cli-release-build.yml). So the bug shipped twice before being caught by user reports. Add a windows-latest job to freebuff-e2e.yml that builds the freebuff binary natively on Windows and runs the long smoke test against it. The full tmux-based e2e matrix can't follow — Windows runners don't ship tmux, and porting tmuxStart/tmuxSend would be substantial — but smoke-binary.ts catches the failure mode that bit us: it spawns the binary, waits long enough for the late renderer-cleanup rejection handler to fire, and asserts both that no fatal markers appeared and that the boot screen actually rendered. Mirrors the Windows-specific bits from cli-release-build.yml's build-windows-binary job: explicit `bun install --cwd cli` and the @opentui workspace symlink fix, both needed because bun workspace linking doesn't work reliably on Windows runners. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/freebuff-e2e.yml | 124 +++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/.github/workflows/freebuff-e2e.yml b/.github/workflows/freebuff-e2e.yml index e88c535fb0..a090ade3ab 100644 --- a/.github/workflows/freebuff-e2e.yml +++ b/.github/workflows/freebuff-e2e.yml @@ -55,6 +55,130 @@ jobs: path: cli/bin/freebuff retention-days: 1 + # Windows-native build + smoke. The full tmux-based e2e matrix below can't + # run here (Windows runners don't have tmux), but the smoke-binary.ts + # check is what would have caught the post-OpenTUI-upgrade tree-sitter + # wasm regression: that bug only manifested on real Windows, while CI was + # Linux-only and macOS dev machines saw it work. Now every push gets a + # real Windows boot test. + build-and-smoke-freebuff-windows: + runs-on: windows-latest + timeout-minutes: 20 + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - uses: ./.github/actions/setup-project + + - name: Ensure CLI dependencies + run: bun install --frozen-lockfile --cwd cli + shell: bash + + # Mirror the symlink fix from cli-release-build.yml's Windows job: bun + # workspace symlinks aren't created reliably on Windows runners, so + # the cli's @opentui imports need explicit junctions to the root + # @opentui packages. + - name: Fix OpenTUI module symlinks + shell: bash + run: | + set -euo pipefail + bun - <<'BUN' + import fs from 'fs'; + import path from 'path'; + + const rootDir = process.cwd(); + const rootOpenTui = path.join(rootDir, 'node_modules', '@opentui'); + const cliNodeModules = path.join(rootDir, 'cli', 'node_modules'); + const cliOpenTui = path.join(cliNodeModules, '@opentui'); + + if (!fs.existsSync(rootOpenTui)) { + console.log('Root @opentui packages missing; skipping fix'); + process.exit(0); + } + + fs.mkdirSync(cliOpenTui, { recursive: true }); + + const packages = ['core', 'react']; + for (const pkg of packages) { + const target = path.join(rootOpenTui, pkg); + const link = path.join(cliOpenTui, pkg); + + if (!fs.existsSync(target)) { + console.log(`Target ${target} missing; skipping ${pkg}`); + continue; + } + + let linkStats = null; + try { + linkStats = fs.lstatSync(link); + } catch (error) { + if (error?.code !== 'ENOENT') { + throw error; + } + } + + if (linkStats) { + let alreadyLinked = false; + try { + const actual = fs.realpathSync(link); + alreadyLinked = actual === target; + } catch { + // Broken symlink or unreadable target; we'll replace it. + } + + if (alreadyLinked) { + continue; + } + + fs.rmSync(link, { recursive: true, force: true }); + } + + const type = process.platform === 'win32' ? 'junction' : 'dir'; + try { + fs.symlinkSync(target, link, type); + console.log(`Linked ${link} -> ${target}`); + } catch (error) { + if (error?.code === 'EEXIST') { + fs.rmSync(link, { recursive: true, force: true }); + fs.symlinkSync(target, link, type); + console.log(`Re-linked ${link} -> ${target}`); + } else { + throw error; + } + } + } + BUN + + - name: Set environment variables + env: + SECRETS_CONTEXT: ${{ toJSON(secrets) }} + shell: bash + run: | + VAR_NAMES=$(bun scripts/generate-ci-env.ts --scope client) + echo "$SECRETS_CONTEXT" | jq -r --argjson vars "$VAR_NAMES" ' + to_entries | .[] | select(.key as $k | $vars | index($k)) | .key + "=" + .value + ' >> $GITHUB_ENV + echo "FREEBUFF_MODE=true" >> $GITHUB_ENV + echo "NEXT_PUBLIC_CB_ENVIRONMENT=prod" >> $GITHUB_ENV + echo "CODEBUFF_GITHUB_ACTIONS=true" >> $GITHUB_ENV + + - name: Build Freebuff binary + run: bun freebuff/cli/build.ts 0.0.0-e2e + shell: bash + + - name: Smoke test binary + shell: bash + run: | + # --version exits via commander synchronously and won't see async + # startup failures (e.g. the Parser.init rejection from a broken + # tree-sitter wasm load). + ./cli/bin/freebuff.exe --version + # Run for several seconds so unhandled rejections during module + # init have time to fire — the freebuff 0.0.62 wasm regression + # surfaced through the *late* renderer-cleanup handler, after the + # boot screen had rendered, so a too-short window can miss it. + bun cli/scripts/smoke-binary.ts cli/bin/freebuff.exe + e2e: needs: build-freebuff runs-on: ubuntu-latest From 6b3dcd10bd9b15693765b2b24a3a0698ade6f33f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 07:58:03 +0000 Subject: [PATCH 02/36] Bump Freebuff version to 0.0.64 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index 7df51e5e3a..bb8c2fe27e 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.63", + "version": "0.0.64", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From 1b6333e46ea717c4d347dfce1f63efdf05f1394c Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 4 May 2026 01:25:44 -0700 Subject: [PATCH 03/36] Add --smoke-tree-sitter flag and fail builds with empty embed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Freebuff 0.0.64 still crashed for users with the same wasm error even though it was built from a commit that contained the base64 embed. The runtime stack trace pointed at the path-resolution fallback in init-node.ts:76, meaning the embed didn't reach the SDK bundle's globalThis check at runtime — the binary fell through to fs.existsSync which never works on Windows bunfs paths. Two hardening passes so this can't ship silently again: - cli/src/pre-init/tree-sitter-wasm.ts: hidden `--smoke-tree-sitter` flag, handled in the very first import. Calls Parser.init({ wasmBinary }) directly with the embedded base64 and exits 0/1. Lives here (not commander) on purpose — it tests *the embed*, not the broader init path that has a path-resolution fallback that would mask a broken embed by passing in dev mode. - cli/scripts/build-binary.ts: post-bun-compile, scan the output binary for the wasm's base64 prefix. Build fails if the bytes didn't actually make it through bundling (e.g. bun dropping a huge string literal, bundle cache reading a stale empty stub). Always-on log of which path the wasm was resolved from so CI logs make the embed step diagnosable. More resilient resolve: search workspace root, cli/node_modules, and sdk/node_modules before falling back to createRequire — Windows CI's `bun install --cwd cli` lays out web-tree-sitter differently than a hoisted root install. - packages/code-map/src/init-node.ts: accept bunfs paths (`/~BUN/root/...`) without an fs.existsSync check. fs.existsSync inconsistently returns false for bun --compile asset paths on Windows even though the runtime can read them, so the existing path-resolution fallback was permanently broken on Windows. Belt-and-braces: this makes the fallback work even if the embed step regresses. - cli/scripts/smoke-binary.ts: run --smoke-tree-sitter as a deterministic pre-check before the long-window boot smoke. A broken embed fails fast with a clear "exit code 1, no boot ok marker" error instead of a 10s timeout that depends on render-loop timing. Verified locally: build embeds 205KB wasm as 274KB base64, post-build verification finds the prefix in the compiled binary, --smoke-tree-sitter exits 0 with "tree-sitter smoke ok", full smoke passes. Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/scripts/build-binary.ts | 94 ++++++++++++++++++++++++---- cli/scripts/smoke-binary.ts | 36 +++++++++++ cli/src/pre-init/tree-sitter-wasm.ts | 46 ++++++++++++-- packages/code-map/src/init-node.ts | 12 +++- 4 files changed, 169 insertions(+), 19 deletions(-) diff --git a/cli/scripts/build-binary.ts b/cli/scripts/build-binary.ts index 44a7dd9570..4ccd1eeff3 100644 --- a/cli/scripts/build-binary.ts +++ b/cli/scripts/build-binary.ts @@ -145,10 +145,10 @@ async function main() { patchOpenTuiAssetPaths() await ensureOpenTuiNativeBundle(targetInfo) - const restoreTreeSitterWasmStub = embedTreeSitterWasmAsBase64() + const treeSitterEmbed = embedTreeSitterWasmAsBase64() // Restore the stub even on build failure so a developer's git working // tree doesn't end up with a multi-megabyte modified file. - process.on('exit', restoreTreeSitterWasmStub) + process.on('exit', treeSitterEmbed.restore) const outputFilename = targetInfo.platform === 'win32' ? `${binaryName}.exe` : binaryName @@ -194,7 +194,17 @@ async function main() { // Build done — restore the stub so a developer's working tree doesn't show // a multi-megabyte diff. (The exit handler above is a backstop for crashes; // the eager call here keeps a successful build clean.) - restoreTreeSitterWasmStub() + treeSitterEmbed.restore() + + // Fail the build if the wasm bytes didn't actually make it into the + // compiled binary. Catches silent regressions (e.g. bun dropping a huge + // string literal, or some future bundler optimization) before we ship a + // broken artifact to users. + verifyTreeSitterWasmEmbedded( + outputFile, + treeSitterEmbed.wasmBase64Prefix, + treeSitterEmbed.wasmByteLength, + ) if (targetInfo.platform !== 'win32') { chmodSync(outputFile, 0o755) @@ -225,7 +235,11 @@ main().catch((error: unknown) => { * Returns a function that restores the stub. Always invoke it (success or * failure) so a developer's working tree doesn't show a multi-MB diff. */ -function embedTreeSitterWasmAsBase64(): () => void { +function embedTreeSitterWasmAsBase64(): { + restore: () => void + wasmBase64Prefix: string + wasmByteLength: number +} { const stubPath = join(cliRoot, 'src', 'pre-init', 'tree-sitter-wasm-bytes.ts') const originalStub = readFileSync(stubPath, 'utf8') let restored = false @@ -239,11 +253,30 @@ function embedTreeSitterWasmAsBase64(): () => void { } } - // Resolve from the CLI workspace so monorepo hoisting differences don't - // matter — `web-tree-sitter` is an SDK dep, but the CLI imports it - // transitively and the bundler walks it from here. - const cliRequire = createRequire(join(cliRoot, 'package.json')) - const wasmPath = cliRequire.resolve('web-tree-sitter/tree-sitter.wasm') + // Try multiple candidate locations because bun's hoisting differs by + // platform and install command — Windows CI does `bun install --cwd cli` + // which can leave web-tree-sitter in cli/node_modules, while monorepo + // root installs hoist it to ../node_modules. Fall back to createRequire + // last so any failure surfaces with the full search trail. + const candidates = [ + join(cliRoot, 'node_modules', 'web-tree-sitter', 'tree-sitter.wasm'), + join(cliRoot, '..', 'node_modules', 'web-tree-sitter', 'tree-sitter.wasm'), + join(cliRoot, '..', 'sdk', 'node_modules', 'web-tree-sitter', 'tree-sitter.wasm'), + ] + let wasmPath = candidates.find((p) => existsSync(p)) + if (!wasmPath) { + try { + const cliRequire = createRequire(join(cliRoot, 'package.json')) + wasmPath = cliRequire.resolve('web-tree-sitter/tree-sitter.wasm') + } catch (err) { + throw new Error( + `Could not locate web-tree-sitter/tree-sitter.wasm. Searched:\n - ` + + candidates.join('\n - ') + + `\nAnd createRequire failed: ${err instanceof Error ? err.message : String(err)}`, + ) + } + } + const wasmBytes = readFileSync(wasmPath) const base64 = wasmBytes.toString('base64') @@ -254,8 +287,47 @@ function embedTreeSitterWasmAsBase64(): () => void { `export const TREE_SITTER_WASM_BASE64 = ${JSON.stringify(base64)}\n` writeFileSync(stubPath, generated) - log(`Embedded tree-sitter.wasm (${wasmBytes.length} bytes → ${base64.length} chars base64)`) - return restore + // Always-on log (not behind VERBOSE) so CI shows which path was used and + // whether the embed succeeded — this is the single most useful breadcrumb + // when the runtime check fails on a user machine. + logAlways( + `Embedded tree-sitter.wasm from ${wasmPath} (${wasmBytes.length} bytes → ${base64.length} chars base64)`, + ) + return { + restore, + wasmBase64Prefix: base64.slice(0, 40), + wasmByteLength: wasmBytes.length, + } +} + +/** + * Sanity-check the compiled binary actually contains the embedded base64. + * If bun --compile ever silently drops a large string literal, or our embed + * step's file write didn't take effect before the bundle ran, we want the + * build to fail here instead of producing a binary that crashes for users. + */ +function verifyTreeSitterWasmEmbedded( + outputFile: string, + wasmBase64Prefix: string, + wasmByteLength: number, +): void { + const binary = readFileSync(outputFile) + // Search as a Buffer so we don't have to load the whole binary as a UTF-8 + // string (binaries are not valid UTF-8 and toString would corrupt bytes). + const needle = Buffer.from(wasmBase64Prefix, 'utf8') + const idx = binary.indexOf(needle) + if (idx === -1) { + throw new Error( + `Embedded tree-sitter wasm prefix not found in ${outputFile}.\n` + + `Expected base64 prefix (first 40 chars): ${wasmBase64Prefix}\n` + + `Original wasm size: ${wasmByteLength} bytes.\n` + + `This means the build-binary.ts embed step ran but bun --compile\n` + + `did not include the bytes in the output. The runtime smoke test\n` + + `would fall back to path-based wasm resolution, which is broken on\n` + + `Windows.`, + ) + } + logAlways(`Verified embedded wasm prefix at offset ${idx} of compiled binary.`) } function patchOpenTuiAssetPaths() { diff --git a/cli/scripts/smoke-binary.ts b/cli/scripts/smoke-binary.ts index e2bf9b779b..2553c87ef2 100644 --- a/cli/scripts/smoke-binary.ts +++ b/cli/scripts/smoke-binary.ts @@ -81,6 +81,39 @@ const FATAL_PATTERNS = [ // the renderer is up). const DEFAULT_RUN_SECONDS = 10 +function runTreeSitterSmoke(binary: string): Promise { + return new Promise((resolve, reject) => { + const proc = spawn(binary, ['--smoke-tree-sitter'], { + stdio: ['ignore', 'pipe', 'pipe'], + env: { ...process.env, NO_COLOR: '1', TERM: 'dumb' }, + }) + + let captured = '' + const append = (chunk: Buffer): void => { + captured += chunk.toString('utf8') + } + proc.stdout?.on('data', append) + proc.stderr?.on('data', append) + + proc.once('error', reject) + proc.once('exit', (code) => { + if (code === 0 && /tree-sitter smoke ok/.test(captured)) { + resolve() + return + } + + reject( + new Error( + `tree-sitter smoke failed with exit code ${code}\n${captured.slice( + 0, + 8 * 1024, + )}`, + ), + ) + }) + }) +} + async function main(): Promise { const binary = process.argv[2] const runSeconds = Number(process.argv[3] ?? DEFAULT_RUN_SECONDS) @@ -100,6 +133,9 @@ async function main(): Promise { console.log(`smoke-binary: spawning ${binary} for ${runSeconds}s…`) + await runTreeSitterSmoke(binary) + console.log('smoke-binary: tree-sitter init OK.') + const proc = spawn(binary, [], { stdio: ['ignore', 'pipe', 'pipe'], env: { ...process.env, NO_COLOR: '1', TERM: 'dumb' }, diff --git a/cli/src/pre-init/tree-sitter-wasm.ts b/cli/src/pre-init/tree-sitter-wasm.ts index c1f1837cd9..b6e54ce2fa 100644 --- a/cli/src/pre-init/tree-sitter-wasm.ts +++ b/cli/src/pre-init/tree-sitter-wasm.ts @@ -18,8 +18,10 @@ import { TREE_SITTER_WASM_BASE64 } from './tree-sitter-wasm-bytes' +let embeddedWasm: Uint8Array | undefined if (TREE_SITTER_WASM_BASE64.length > 0) { const buf = Buffer.from(TREE_SITTER_WASM_BASE64, 'base64') + embeddedWasm = new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength) // globalThis is the only cross-bundle channel: the SDK pre-built bundle // inlines its own copy of `init-node.ts`, so a module-level variable in // the source package isn't visible to the singleton initialized via the @@ -27,9 +29,43 @@ if (TREE_SITTER_WASM_BASE64.length > 0) { // Buffer's shared underlying ArrayBuffer. ;( globalThis as { __CODEBUFF_TREE_SITTER_WASM_BINARY__?: Uint8Array } - ).__CODEBUFF_TREE_SITTER_WASM_BINARY__ = new Uint8Array( - buf.buffer, - buf.byteOffset, - buf.byteLength, - ) + ).__CODEBUFF_TREE_SITTER_WASM_BINARY__ = embeddedWasm +} + +// Deterministic CI gate: ` --smoke-tree-sitter` proves the embed +// shipped end-to-end. Lives here, in the very first import, on purpose: +// +// - We're testing whether the *embed* works. Going through commander + +// initTreeSitterForNode would also pass via the path-resolution +// fallback when the embed is empty (e.g. dev mode), giving false +// positives that mask a broken production build. +// - Failing here, before any other module loads, gives a sharp signal: +// the embed either worked or it didn't. No render-loop timing, no +// commander wiring, no SDK init order to debug. +// +// Async IIFE because Parser.init returns a promise; process.exit tears +// the process down before parallel top-level imports can fire side +// effects we'd have to clean up. +if (process.argv.includes('--smoke-tree-sitter')) { + void (async () => { + try { + if (!embeddedWasm) { + console.error( + 'tree-sitter smoke FAIL: TREE_SITTER_WASM_BASE64 stub is empty — ' + + 'the build-binary.ts embed step did not run or did not write the file.', + ) + process.exit(1) + } + const { Parser } = await import('web-tree-sitter') + await Parser.init({ wasmBinary: embeddedWasm }) + // Marker grepped by cli/scripts/smoke-binary.ts — keep this exact text. + console.log( + `tree-sitter smoke ok (${embeddedWasm.byteLength} bytes wasm initialized)`, + ) + process.exit(0) + } catch (err) { + console.error('tree-sitter smoke FAIL:', err) + process.exit(1) + } + })() } diff --git a/packages/code-map/src/init-node.ts b/packages/code-map/src/init-node.ts index d46793f68c..e3927a0cfc 100644 --- a/packages/code-map/src/init-node.ts +++ b/packages/code-map/src/init-node.ts @@ -30,14 +30,20 @@ function getEmbeddedWasmBinary(): Uint8Array | undefined { )[WASM_BINARY_GLOBAL_KEY] } +function isBunEmbeddedPath(filePath: string): boolean { + return filePath.replace(/\\/g, '/').includes('/~BUN/root/') +} + function resolveTreeSitterWasm(scriptDir: string): string { const override = process.env[TREE_SITTER_WASM_ENV_VAR] - if (override && fs.existsSync(override)) { - return override + if (override) { + if (fs.existsSync(override) || isBunEmbeddedPath(override)) { + return override + } } const fallback = path.join(scriptDir, 'tree-sitter.wasm') - if (fs.existsSync(fallback)) { + if (fs.existsSync(fallback) || isBunEmbeddedPath(fallback)) { return fallback } From ad6a9004b4e4002c1d02a514ebf5674687ac63b1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 08:30:21 +0000 Subject: [PATCH 04/36] Bump version to 1.0.655 --- cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/release/package.json b/cli/release/package.json index cfb51a6817..c2e5fd500f 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.654", + "version": "1.0.655", "description": "AI coding agent", "license": "MIT", "bin": { From 54c07293f4074bfc1924e4f099092892a9940ab1 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 4 May 2026 01:46:01 -0700 Subject: [PATCH 05/36] Switch tree-sitter wasm embed from base64 string to `with { type: 'file' }` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The base64-in-source approach didn't survive `bun --compile` on Windows. The CI build's `verifyTreeSitterWasmEmbedded` step caught it: Embedded tree-sitter.wasm from D:\a\...\tree-sitter.wasm (205488 bytes → 273984 chars base64) [343ms] minify -16.58 MB Embedded tree-sitter wasm prefix not found in D:\a\...\codebuff.exe. So the embed step wrote the bytes to disk and bun read them, but the 274KB string literal didn't end up in the compiled output — likely tree-shaken or transformed by the minifier on Windows. The same code worked on macOS and Linux locally and in CI. Switch to Bun's documented asset-embed mechanism: import the wasm with `with { type: 'file' }`. Bun handles this through the bundler's asset pipeline rather than as a generic string literal, and the resulting binary contains the wasm bytes verbatim at a bunfs path. - cli/src/pre-init/tree-sitter-wasm.ts: import the wasm path, set the env var (for the locateFile fallback), and try a synchronous read so Parser.init can take the wasmBinary fast path. If the read throws (some Windows configurations have done this), log loudly so user reports include the diagnostic, then fall through to the locateFile flow — which init-node.ts now accepts bunfs paths through, even when fs.existsSync misreports them. - The --smoke-tree-sitter handler is now a top-level `await` instead of a fire-and-forget IIFE. Without that, commander.parse() ran synchronously in main() and failed on the unknown flag before the smoke handler could exit cleanly. - cli/scripts/build-binary.ts: drop the base64 stub-overwrite step entirely. New verifyTreeSitterWasmEmbedded reads a 64-byte chunk from the *middle* of the source wasm and asserts it appears in the compiled binary — that proves *this specific* tree-sitter.wasm shipped, not just any wasm (OpenTUI also embeds tree-sitter language wasms, so a magic-bytes-only scan would false-pass). - Delete cli/src/pre-init/tree-sitter-wasm-bytes.ts: no longer used. Verified locally: build embeds tree-sitter.wasm via the file-attribute import, post-build verification finds the source bytes at offset 77319353 of the compiled binary, --smoke-tree-sitter exits 0 with "tree-sitter smoke ok (wasmBinary, 205488 bytes)". Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/scripts/build-binary.ts | 131 ++++++--------------- cli/src/pre-init/tree-sitter-wasm-bytes.ts | 16 --- cli/src/pre-init/tree-sitter-wasm.ts | 126 ++++++++++++-------- 3 files changed, 113 insertions(+), 160 deletions(-) delete mode 100644 cli/src/pre-init/tree-sitter-wasm-bytes.ts diff --git a/cli/scripts/build-binary.ts b/cli/scripts/build-binary.ts index 4ccd1eeff3..d292ee918b 100644 --- a/cli/scripts/build-binary.ts +++ b/cli/scripts/build-binary.ts @@ -145,11 +145,6 @@ async function main() { patchOpenTuiAssetPaths() await ensureOpenTuiNativeBundle(targetInfo) - const treeSitterEmbed = embedTreeSitterWasmAsBase64() - // Restore the stub even on build failure so a developer's git working - // tree doesn't end up with a multi-megabyte modified file. - process.on('exit', treeSitterEmbed.restore) - const outputFilename = targetInfo.platform === 'win32' ? `${binaryName}.exe` : binaryName const outputFile = join(binDir, outputFilename) @@ -191,20 +186,12 @@ async function main() { runCommand('bun', buildArgs, { cwd: cliRoot }) - // Build done — restore the stub so a developer's working tree doesn't show - // a multi-megabyte diff. (The exit handler above is a backstop for crashes; - // the eager call here keeps a successful build clean.) - treeSitterEmbed.restore() - - // Fail the build if the wasm bytes didn't actually make it into the - // compiled binary. Catches silent regressions (e.g. bun dropping a huge - // string literal, or some future bundler optimization) before we ship a - // broken artifact to users. - verifyTreeSitterWasmEmbedded( - outputFile, - treeSitterEmbed.wasmBase64Prefix, - treeSitterEmbed.wasmByteLength, - ) + // Fail the build if the wasm asset didn't actually make it into the + // compiled binary. The pre-init imports tree-sitter.wasm with `with { + // type: 'file' }`, which Bun should embed; this scan catches silent + // regressions (e.g. tree-shaking eliminating the import) before we ship + // a broken artifact. + verifyTreeSitterWasmEmbedded(outputFile) if (targetInfo.platform !== 'win32') { chmodSync(outputFile, 0o755) @@ -225,39 +212,20 @@ main().catch((error: unknown) => { }) /** - * Inline the contents of `web-tree-sitter/tree-sitter.wasm` as a base64 string - * literal in `cli/src/pre-init/tree-sitter-wasm-bytes.ts`. The committed - * file is a stub; this overwrites it with the real bytes immediately before - * `bun build --compile`, so the bytes get baked into the binary's text - * segment instead of being placed at a bunfs path that has to be fs-read at - * runtime. + * Sanity-check the compiled binary actually contains web-tree-sitter's + * tree-sitter.wasm. The pre-init imports it via `with { type: 'file' }`, + * which should bundle the asset at a bunfs path. If tree-shaking or a + * future bundler change drops the import, the binary still compiles but + * tree-sitter init fails at runtime — this scan fails the build before + * we upload that artifact. * - * Returns a function that restores the stub. Always invoke it (success or - * failure) so a developer's working tree doesn't show a multi-MB diff. + * Looks for the actual wasm bytes (a unique 64-byte chunk pulled from + * the source file's interior), not just the wasm magic header — OpenTUI + * embeds its own tree-sitter language wasms, so a magic-bytes-only scan + * would false-pass even without our import. A literal bytes match + * proves *this specific* wasm shipped. */ -function embedTreeSitterWasmAsBase64(): { - restore: () => void - wasmBase64Prefix: string - wasmByteLength: number -} { - const stubPath = join(cliRoot, 'src', 'pre-init', 'tree-sitter-wasm-bytes.ts') - const originalStub = readFileSync(stubPath, 'utf8') - let restored = false - const restore = (): void => { - if (restored) return - restored = true - try { - writeFileSync(stubPath, originalStub) - } catch (error) { - console.error('Failed to restore tree-sitter-wasm-bytes stub:', error) - } - } - - // Try multiple candidate locations because bun's hoisting differs by - // platform and install command — Windows CI does `bun install --cwd cli` - // which can leave web-tree-sitter in cli/node_modules, while monorepo - // root installs hoist it to ../node_modules. Fall back to createRequire - // last so any failure surfaces with the full search trail. +function verifyTreeSitterWasmEmbedded(outputFile: string): void { const candidates = [ join(cliRoot, 'node_modules', 'web-tree-sitter', 'tree-sitter.wasm'), join(cliRoot, '..', 'node_modules', 'web-tree-sitter', 'tree-sitter.wasm'), @@ -270,64 +238,37 @@ function embedTreeSitterWasmAsBase64(): { wasmPath = cliRequire.resolve('web-tree-sitter/tree-sitter.wasm') } catch (err) { throw new Error( - `Could not locate web-tree-sitter/tree-sitter.wasm. Searched:\n - ` + + `Could not locate web-tree-sitter/tree-sitter.wasm to verify against. Searched:\n - ` + candidates.join('\n - ') + `\nAnd createRequire failed: ${err instanceof Error ? err.message : String(err)}`, ) } } - const wasmBytes = readFileSync(wasmPath) - const base64 = wasmBytes.toString('base64') - - const generated = - `// AUTO-GENERATED by cli/scripts/build-binary.ts during \`bun build --compile\`.\n` + - `// Restored to the empty stub after the build finishes — do not commit a\n` + - `// non-empty value here.\n` + - `export const TREE_SITTER_WASM_BASE64 = ${JSON.stringify(base64)}\n` - - writeFileSync(stubPath, generated) - // Always-on log (not behind VERBOSE) so CI shows which path was used and - // whether the embed succeeded — this is the single most useful breadcrumb - // when the runtime check fails on a user machine. - logAlways( - `Embedded tree-sitter.wasm from ${wasmPath} (${wasmBytes.length} bytes → ${base64.length} chars base64)`, - ) - return { - restore, - wasmBase64Prefix: base64.slice(0, 40), - wasmByteLength: wasmBytes.length, - } -} + const wasm = readFileSync(wasmPath) + // Take a 64-byte slice from the middle of the file. The header has + // generic wasm magic + section markers; the tail can be padding. The + // middle is densely packed code/data unique to this specific wasm + // module. + const needleStart = Math.floor(wasm.length / 2) + const needle = wasm.subarray(needleStart, needleStart + 64) -/** - * Sanity-check the compiled binary actually contains the embedded base64. - * If bun --compile ever silently drops a large string literal, or our embed - * step's file write didn't take effect before the bundle ran, we want the - * build to fail here instead of producing a binary that crashes for users. - */ -function verifyTreeSitterWasmEmbedded( - outputFile: string, - wasmBase64Prefix: string, - wasmByteLength: number, -): void { const binary = readFileSync(outputFile) - // Search as a Buffer so we don't have to load the whole binary as a UTF-8 - // string (binaries are not valid UTF-8 and toString would corrupt bytes). - const needle = Buffer.from(wasmBase64Prefix, 'utf8') const idx = binary.indexOf(needle) if (idx === -1) { throw new Error( - `Embedded tree-sitter wasm prefix not found in ${outputFile}.\n` + - `Expected base64 prefix (first 40 chars): ${wasmBase64Prefix}\n` + - `Original wasm size: ${wasmByteLength} bytes.\n` + - `This means the build-binary.ts embed step ran but bun --compile\n` + - `did not include the bytes in the output. The runtime smoke test\n` + - `would fall back to path-based wasm resolution, which is broken on\n` + - `Windows.`, + `web-tree-sitter wasm content not found in ${outputFile}.\n` + + `Source wasm: ${wasmPath} (${wasm.length} bytes)\n` + + `Searched for 64 bytes from offset ${needleStart} of the source.\n` + + `Either the \`with { type: 'file' }\` import in the pre-init was\n` + + `tree-shaken out, or bun --compile didn't embed the asset on this\n` + + `platform. The runtime tree-sitter init would fail with\n` + + `"Internal error: tree-sitter.wasm not found".`, ) } - logAlways(`Verified embedded wasm prefix at offset ${idx} of compiled binary.`) + logAlways( + `Verified embedded tree-sitter.wasm at offset ${idx} of compiled binary (source: ${wasmPath}).`, + ) } function patchOpenTuiAssetPaths() { diff --git a/cli/src/pre-init/tree-sitter-wasm-bytes.ts b/cli/src/pre-init/tree-sitter-wasm-bytes.ts deleted file mode 100644 index 71bf6c2a59..0000000000 --- a/cli/src/pre-init/tree-sitter-wasm-bytes.ts +++ /dev/null @@ -1,16 +0,0 @@ -// Stub committed for dev mode and tests. The real wasm bytes are inlined -// here as base64 by `cli/scripts/build-binary.ts` immediately before -// `bun build --compile`, then restored to the empty stub after the build -// completes. Dev mode and unit tests see the empty stub and fall back to -// path-based resolution in `packages/code-map/src/init-node.ts` (which -// works locally because `node_modules/web-tree-sitter/tree-sitter.wasm` -// exists on the filesystem). -// -// Why a string literal instead of `with { type: 'file' }` + readFileSync: -// the file-import approach left the bytes in bunfs and required a runtime -// fs read, which silently failed on Windows (`fs.readFileSync` for -// `B:\~BUN\root\...` paths) and let the singleton fall through to a -// path-based fallback that also failed there. A base64 string literal in -// the JS source compiles into the bun binary's text segment, with no -// filesystem step on the hot path. -export const TREE_SITTER_WASM_BASE64 = '' diff --git a/cli/src/pre-init/tree-sitter-wasm.ts b/cli/src/pre-init/tree-sitter-wasm.ts index b6e54ce2fa..c7c1c19f2d 100644 --- a/cli/src/pre-init/tree-sitter-wasm.ts +++ b/cli/src/pre-init/tree-sitter-wasm.ts @@ -1,71 +1,99 @@ // Embed tree-sitter.wasm into the bun-compile binary so the SDK's tree-sitter // parser singleton can find it at runtime. Must be the very first import in // `index.tsx`: subsequent imports (the SDK / code-map) eagerly construct the -// parser, and its init reads what we publish here on `globalThis`. +// parser, and its init reads what we publish here on `globalThis` and via +// the env var. // -// Why not `with { type: 'file' }` + a runtime fs read? That's what the prior -// fix tried, and it silently failed on Windows: bun --compile reports the -// embedded asset path as `B:\~BUN\root\...`, and on some Windows configs -// `fs.readFileSync` of that path throws (caught silently), so the SDK fell -// back to path-based resolution that also failed there. -// -// The base64 string in `tree-sitter-wasm-bytes.ts` is replaced with the real -// wasm contents by `cli/scripts/build-binary.ts` right before `bun build -// --compile` and restored after. The bytes end up in the binary's text -// segment as a JS string literal — no filesystem step on the hot path. In -// dev / unit tests the stub is empty and code-map falls back to the -// node_modules wasm, which works because the file actually exists locally. +// Why `with { type: 'file' }` rather than embedding base64 in TS source: +// the latter doesn't survive `bun --compile` on Windows. The base64 string +// gets dropped or transformed somewhere in the bundle/minify pipeline, so +// the runtime sees an empty stub even though the build script wrote the +// real bytes. `with { type: 'file' }` is Bun's documented asset-embed +// path — the file gets placed at a bunfs location the runtime can read. + +import { readFileSync } from 'fs' -import { TREE_SITTER_WASM_BASE64 } from './tree-sitter-wasm-bytes' +// @ts-expect-error - Bun's `with { type: 'file' }` returns a string path; TS +// has no loader for the .wasm subpath of web-tree-sitter's package exports. +import treeSitterWasmPath from 'web-tree-sitter/tree-sitter.wasm' with { + type: 'file', +} let embeddedWasm: Uint8Array | undefined -if (TREE_SITTER_WASM_BASE64.length > 0) { - const buf = Buffer.from(TREE_SITTER_WASM_BASE64, 'base64') - embeddedWasm = new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength) - // globalThis is the only cross-bundle channel: the SDK pre-built bundle - // inlines its own copy of `init-node.ts`, so a module-level variable in - // the source package isn't visible to the singleton initialized via the - // SDK. Slice into a fresh Uint8Array view instead of handing over the - // Buffer's shared underlying ArrayBuffer. - ;( - globalThis as { __CODEBUFF_TREE_SITTER_WASM_BINARY__?: Uint8Array } - ).__CODEBUFF_TREE_SITTER_WASM_BINARY__ = embeddedWasm + +if (treeSitterWasmPath) { + // Path stays for the locateFile fallback in init-node.ts. That fallback + // accepts bunfs-style paths (`/~BUN/root/...`) without checking + // fs.existsSync, because fs.existsSync misreports those paths on Windows. + // emscripten's wasm loader will fs.readFile them through its own runtime. + process.env.CODEBUFF_TREE_SITTER_WASM_PATH = treeSitterWasmPath + + // Also try a synchronous read so we can hand the bytes straight to + // Parser.init via wasmBinary — bypassing locateFile entirely is the most + // robust path. If readFileSync of the bunfs path throws on this OS (we've + // seen this happen on Windows in some configurations), log it loudly so + // the smoke check / user reports include the diagnostic, then fall + // through to the locateFile flow. + try { + const buf = readFileSync(treeSitterWasmPath) + embeddedWasm = new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength) + ;( + globalThis as { __CODEBUFF_TREE_SITTER_WASM_BINARY__?: Uint8Array } + ).__CODEBUFF_TREE_SITTER_WASM_BINARY__ = embeddedWasm + } catch (err) { + console.error( + '[tree-sitter pre-init] readFileSync failed for embedded wasm at', + treeSitterWasmPath, + '—', + err instanceof Error ? err.message : String(err), + ) + } } // Deterministic CI gate: ` --smoke-tree-sitter` proves the embed // shipped end-to-end. Lives here, in the very first import, on purpose: // // - We're testing whether the *embed* works. Going through commander + -// initTreeSitterForNode would also pass via the path-resolution -// fallback when the embed is empty (e.g. dev mode), giving false -// positives that mask a broken production build. +// initTreeSitterForNode would pass via the path-resolution fallback +// when the embed is empty (e.g. dev mode), giving false positives that +// mask a broken production build. // - Failing here, before any other module loads, gives a sharp signal: -// the embed either worked or it didn't. No render-loop timing, no -// commander wiring, no SDK init order to debug. +// either the wasm reached the runtime or it didn't. // -// Async IIFE because Parser.init returns a promise; process.exit tears -// the process down before parallel top-level imports can fire side -// effects we'd have to clean up. +// Top-level await (not a fire-and-forget IIFE) because subsequent module +// evaluation has to *wait* — otherwise `commander.parse()` runs first and +// fails on the unknown flag before our handler can exit cleanly. if (process.argv.includes('--smoke-tree-sitter')) { - void (async () => { - try { - if (!embeddedWasm) { - console.error( - 'tree-sitter smoke FAIL: TREE_SITTER_WASM_BASE64 stub is empty — ' + - 'the build-binary.ts embed step did not run or did not write the file.', - ) - process.exit(1) - } - const { Parser } = await import('web-tree-sitter') + try { + const { Parser } = await import('web-tree-sitter') + // Prefer the wasmBinary path (no filesystem step). Fall back to + // letting Parser.init resolve the path via its locateFile callback, + // which init-node.ts wires up to accept bunfs paths even when + // fs.existsSync says otherwise. + if (embeddedWasm) { await Parser.init({ wasmBinary: embeddedWasm }) - // Marker grepped by cli/scripts/smoke-binary.ts — keep this exact text. console.log( - `tree-sitter smoke ok (${embeddedWasm.byteLength} bytes wasm initialized)`, + `tree-sitter smoke ok (wasmBinary, ${embeddedWasm.byteLength} bytes)`, + ) + } else if (treeSitterWasmPath) { + await Parser.init({ + locateFile: (name: string) => + name === 'tree-sitter.wasm' ? treeSitterWasmPath : name, + }) + console.log( + `tree-sitter smoke ok (locateFile, path=${treeSitterWasmPath})`, + ) + } else { + console.error( + 'tree-sitter smoke FAIL: no embedded wasm path. The `with { type: ' + + "'file' }` import returned a falsy value, which means the bundler " + + 'did not embed the asset.', ) - process.exit(0) - } catch (err) { - console.error('tree-sitter smoke FAIL:', err) process.exit(1) } - })() + process.exit(0) + } catch (err) { + console.error('tree-sitter smoke FAIL:', err) + process.exit(1) + } } From ecdb374146053f01271ab8f6a21b2480034f90de Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 08:47:09 +0000 Subject: [PATCH 06/36] Bump version to 1.0.656 --- cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/release/package.json b/cli/release/package.json index c2e5fd500f..37f3124bad 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.655", + "version": "1.0.656", "description": "AI coding agent", "license": "MIT", "bin": { From 01fefdadd3151d7dc6abd29d4b83d1c6d6b29d62 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 08:47:20 +0000 Subject: [PATCH 07/36] Bump Freebuff version to 0.0.65 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index bb8c2fe27e..c1fd94ec24 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.64", + "version": "0.0.65", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From 7d5829478ef86da325cf3e75685e71e20bdefad0 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 4 May 2026 01:55:23 -0700 Subject: [PATCH 08/36] Move --smoke-tree-sitter handler to main() to bypass commander MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Last attempt put the handler at top-level in the pre-init module behind a top-level await, on the theory that ESM would pause subsequent module evaluation until it resolved. That worked on macOS locally but not on Windows in CI: smoke-binary: spawning ./codebuff.exe for 10s… error: tree-sitter smoke failed with exit code 1 error: unknown option '--smoke-tree-sitter' So commander.parse() ran before our handler exited, which means top-level await is not actually blocking parent-module evaluation in the bun --compile output on Windows (or it's getting transformed away by `--production` minification). Move the handler to the top of main() in cli/src/index.tsx, before parseArgs(). At that point commander hasn't run yet, so we can short- circuit cleanly. The pre-init module's only job is now to publish the embedded wasm bytes (globalThis) and path (env var); the handler reads those out of the same channels the production runtime uses. Verified locally: ./codebuff --smoke-tree-sitter prints "tree-sitter smoke ok (wasmBinary, 205488 bytes)" and exits 0; full smoke-binary.ts run passes both the tree-sitter pre-check and the boot-screen window. Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/src/index.tsx | 38 +++++++++++++++++++ cli/src/pre-init/tree-sitter-wasm.ts | 55 ++++------------------------ 2 files changed, 46 insertions(+), 47 deletions(-) diff --git a/cli/src/index.tsx b/cli/src/index.tsx index 092fd0d1eb..05b0fdb160 100644 --- a/cli/src/index.tsx +++ b/cli/src/index.tsx @@ -186,6 +186,44 @@ function parseArgs(): ParsedArgs { } async function main(): Promise { + // CI gate: ` --smoke-tree-sitter` proves the embedded wasm boots + // through Parser.init end-to-end. Has to live BEFORE commander.parse() — + // an earlier attempt put this in a pre-init module with top-level await, + // and on Windows that didn't actually pause module evaluation (commander + // still ran first and rejected the unknown flag). + if (process.argv.includes('--smoke-tree-sitter')) { + const wasmBinary = ( + globalThis as { __CODEBUFF_TREE_SITTER_WASM_BINARY__?: Uint8Array } + ).__CODEBUFF_TREE_SITTER_WASM_BINARY__ + const wasmPath = process.env.CODEBUFF_TREE_SITTER_WASM_PATH + try { + const { Parser } = await import('web-tree-sitter') + if (wasmBinary) { + await Parser.init({ wasmBinary }) + // Marker grepped by cli/scripts/smoke-binary.ts — keep this exact text. + console.log( + `tree-sitter smoke ok (wasmBinary, ${wasmBinary.byteLength} bytes)`, + ) + } else if (wasmPath) { + await Parser.init({ + locateFile: (name: string) => + name === 'tree-sitter.wasm' ? wasmPath : name, + }) + console.log(`tree-sitter smoke ok (locateFile, path=${wasmPath})`) + } else { + console.error( + 'tree-sitter smoke FAIL: pre-init published neither globalThis bytes nor an env path. ' + + 'The `with { type: \'file\' }` import returned falsy.', + ) + process.exit(1) + } + process.exit(0) + } catch (err) { + console.error('tree-sitter smoke FAIL:', err) + process.exit(1) + } + } + // Run OSC theme detection BEFORE anything else. // This MUST happen before OpenTUI starts because OSC responses come through stdin, // and OpenTUI also listens to stdin. Running detection here ensures stdin is clean. diff --git a/cli/src/pre-init/tree-sitter-wasm.ts b/cli/src/pre-init/tree-sitter-wasm.ts index c7c1c19f2d..3d250cfd34 100644 --- a/cli/src/pre-init/tree-sitter-wasm.ts +++ b/cli/src/pre-init/tree-sitter-wasm.ts @@ -50,50 +50,11 @@ if (treeSitterWasmPath) { } } -// Deterministic CI gate: ` --smoke-tree-sitter` proves the embed -// shipped end-to-end. Lives here, in the very first import, on purpose: -// -// - We're testing whether the *embed* works. Going through commander + -// initTreeSitterForNode would pass via the path-resolution fallback -// when the embed is empty (e.g. dev mode), giving false positives that -// mask a broken production build. -// - Failing here, before any other module loads, gives a sharp signal: -// either the wasm reached the runtime or it didn't. -// -// Top-level await (not a fire-and-forget IIFE) because subsequent module -// evaluation has to *wait* — otherwise `commander.parse()` runs first and -// fails on the unknown flag before our handler can exit cleanly. -if (process.argv.includes('--smoke-tree-sitter')) { - try { - const { Parser } = await import('web-tree-sitter') - // Prefer the wasmBinary path (no filesystem step). Fall back to - // letting Parser.init resolve the path via its locateFile callback, - // which init-node.ts wires up to accept bunfs paths even when - // fs.existsSync says otherwise. - if (embeddedWasm) { - await Parser.init({ wasmBinary: embeddedWasm }) - console.log( - `tree-sitter smoke ok (wasmBinary, ${embeddedWasm.byteLength} bytes)`, - ) - } else if (treeSitterWasmPath) { - await Parser.init({ - locateFile: (name: string) => - name === 'tree-sitter.wasm' ? treeSitterWasmPath : name, - }) - console.log( - `tree-sitter smoke ok (locateFile, path=${treeSitterWasmPath})`, - ) - } else { - console.error( - 'tree-sitter smoke FAIL: no embedded wasm path. The `with { type: ' + - "'file' }` import returned a falsy value, which means the bundler " + - 'did not embed the asset.', - ) - process.exit(1) - } - process.exit(0) - } catch (err) { - console.error('tree-sitter smoke FAIL:', err) - process.exit(1) - } -} +// `--smoke-tree-sitter` is the deterministic CI gate. We can't handle it +// here with top-level await — bun --compile on Windows didn't preserve the +// blocking semantics in our last attempt, so commander still ran and +// rejected the unknown flag. Instead, the handler lives at the top of +// main() in cli/src/index.tsx (before parseArgs), where we can synchronously +// short-circuit before commander parses argv. This module's job is just to +// publish the wasm bytes / path on globalThis + process.env so that the +// handler (and the SDK's eager Parser.init) can find them. From b1bd842c69c91cf297d3b35405917471ff4d8c4f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 08:56:25 +0000 Subject: [PATCH 09/36] Bump version to 1.0.657 --- cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/release/package.json b/cli/release/package.json index 37f3124bad..b62621d4e2 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.656", + "version": "1.0.657", "description": "AI coding agent", "license": "MIT", "bin": { From f9f207a0a9b799d0ab7c5dacea2420c73fc17b39 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 4 May 2026 02:04:32 -0700 Subject: [PATCH 10/36] Stage tree-sitter.wasm into pre-init/ for relative `with { type: 'file' }` On Windows, bun --compile bundles the wasm bytes (build verification finds them at a known offset) but the JS-level binding from a node_modules subpath import returns falsy at runtime: import wasmPath from 'web-tree-sitter/tree-sitter.wasm' with { type: 'file' } // wasmPath is undefined on Windows even though the bytes are in // the binary Smoke check on the failed release confirmed it directly: tree-sitter smoke FAIL: pre-init published neither globalThis bytes nor an env path. The `with { type: 'file' }` import returned falsy. OpenTUI's own tree-sitter assets work because they're imported via *relative* paths from inside the package. Mirror that: copy the wasm into cli/src/pre-init/ before `bun build --compile`, import it relatively, remove the copy after the build. - cli/scripts/build-binary.ts: stagePreInitWasm() copies the source wasm to cli/src/pre-init/tree-sitter.wasm; cleanup runs after the compile and is also wired to process.on('exit') so a build-script crash doesn't leave a multi-MB untracked file in the working tree. The findWebTreeSitterWasm() lookup is shared with the post-build verification. - cli/src/pre-init/tree-sitter-wasm.ts: import is now `./tree-sitter.wasm` (relative). The file is .gitignored so dev-mode runs see no wasm here and fall through to init-node.ts's path-based resolution, which works locally because node_modules has the file. - cli/.gitignore: ignore the staged copy. Verified locally: build stages then cleans up the wasm, post-build verification finds the bytes, --smoke-tree-sitter exits 0 with "tree-sitter smoke ok (wasmBinary, 205488 bytes)". Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/.gitignore | 4 ++ cli/scripts/build-binary.ts | 93 ++++++++++++++++++++++------ cli/src/pre-init/tree-sitter-wasm.ts | 21 +++++-- 3 files changed, 94 insertions(+), 24 deletions(-) diff --git a/cli/.gitignore b/cli/.gitignore index 1a78428e3e..49a801de80 100644 --- a/cli/.gitignore +++ b/cli/.gitignore @@ -7,3 +7,7 @@ debug/ # Generated files src/agents/bundled-agents.generated.ts + +# Staged by build-binary.ts before `bun build --compile`, removed after. +# See cli/src/pre-init/tree-sitter-wasm.ts for why we copy this in. +src/pre-init/tree-sitter.wasm diff --git a/cli/scripts/build-binary.ts b/cli/scripts/build-binary.ts index d292ee918b..eb7fd3cb1b 100644 --- a/cli/scripts/build-binary.ts +++ b/cli/scripts/build-binary.ts @@ -145,6 +145,10 @@ async function main() { patchOpenTuiAssetPaths() await ensureOpenTuiNativeBundle(targetInfo) + const wasmCopy = stagePreInitWasm() + // Even on a build-script crash, leave the developer's working tree clean. + process.on('exit', wasmCopy.cleanup) + const outputFilename = targetInfo.platform === 'win32' ? `${binaryName}.exe` : binaryName const outputFile = join(binDir, outputFilename) @@ -186,6 +190,11 @@ async function main() { runCommand('bun', buildArgs, { cwd: cliRoot }) + // Remove the staged pre-init wasm now that the build has read it. Eager + // cleanup keeps a successful build clean; the exit handler above is a + // backstop for crashes between stage and now. + wasmCopy.cleanup() + // Fail the build if the wasm asset didn't actually make it into the // compiled binary. The pre-init imports tree-sitter.wasm with `with { // type: 'file' }`, which Bun should embed; this scan catches silent @@ -211,6 +220,70 @@ main().catch((error: unknown) => { process.exit(1) }) +/** + * Find web-tree-sitter's tree-sitter.wasm in any plausible node_modules + * layout — bun hoists differently across platforms and `bun install` + * variants, and CI Windows lays it out differently than monorepo-root + * installs. + */ +function findWebTreeSitterWasm(): string { + const candidates = [ + join(cliRoot, 'node_modules', 'web-tree-sitter', 'tree-sitter.wasm'), + join(cliRoot, '..', 'node_modules', 'web-tree-sitter', 'tree-sitter.wasm'), + join(cliRoot, '..', 'sdk', 'node_modules', 'web-tree-sitter', 'tree-sitter.wasm'), + ] + const found = candidates.find((p) => existsSync(p)) + if (found) return found + try { + const cliRequire = createRequire(join(cliRoot, 'package.json')) + return cliRequire.resolve('web-tree-sitter/tree-sitter.wasm') + } catch (err) { + throw new Error( + `Could not locate web-tree-sitter/tree-sitter.wasm. Searched:\n - ` + + candidates.join('\n - ') + + `\nAnd createRequire failed: ${err instanceof Error ? err.message : String(err)}`, + ) + } +} + +/** + * Copy `tree-sitter.wasm` into `cli/src/pre-init/` so the pre-init module + * can import it via a relative `with { type: 'file' }` path. We can't + * import it directly as a node_modules subpath: on Windows, bun's + * `with { type: 'file' }` resolution returned falsy at runtime for + * `web-tree-sitter/tree-sitter.wasm` even though the bytes ended up in + * the binary, breaking the pre-init's runtime path lookup. OpenTUI's own + * tree-sitter assets work because they're imported relatively from + * inside the package — same trick here. + * + * Returns a cleanup function. The build calls it eagerly after compile + * and registers it as an exit handler so a mid-build crash doesn't leave + * a multi-MB untracked file in the working tree. + */ +function stagePreInitWasm(): { cleanup: () => void } { + const sourceWasm = findWebTreeSitterWasm() + const stagedPath = join(cliRoot, 'src', 'pre-init', 'tree-sitter.wasm') + let cleaned = false + const cleanup = (): void => { + if (cleaned) return + cleaned = true + if (existsSync(stagedPath)) { + try { + rmSync(stagedPath) + } catch (error) { + console.error('Failed to remove staged pre-init wasm:', error) + } + } + } + + // Read + write rather than copyFile so we don't accidentally hardlink + // (some Windows hosts fail to delete hardlinks while bun has the file + // mmapped from the compile step). + writeFileSync(stagedPath, readFileSync(sourceWasm)) + logAlways(`Staged pre-init wasm: ${sourceWasm} → ${stagedPath}`) + return { cleanup } +} + /** * Sanity-check the compiled binary actually contains web-tree-sitter's * tree-sitter.wasm. The pre-init imports it via `with { type: 'file' }`, @@ -226,25 +299,7 @@ main().catch((error: unknown) => { * proves *this specific* wasm shipped. */ function verifyTreeSitterWasmEmbedded(outputFile: string): void { - const candidates = [ - join(cliRoot, 'node_modules', 'web-tree-sitter', 'tree-sitter.wasm'), - join(cliRoot, '..', 'node_modules', 'web-tree-sitter', 'tree-sitter.wasm'), - join(cliRoot, '..', 'sdk', 'node_modules', 'web-tree-sitter', 'tree-sitter.wasm'), - ] - let wasmPath = candidates.find((p) => existsSync(p)) - if (!wasmPath) { - try { - const cliRequire = createRequire(join(cliRoot, 'package.json')) - wasmPath = cliRequire.resolve('web-tree-sitter/tree-sitter.wasm') - } catch (err) { - throw new Error( - `Could not locate web-tree-sitter/tree-sitter.wasm to verify against. Searched:\n - ` + - candidates.join('\n - ') + - `\nAnd createRequire failed: ${err instanceof Error ? err.message : String(err)}`, - ) - } - } - + const wasmPath = findWebTreeSitterWasm() const wasm = readFileSync(wasmPath) // Take a 64-byte slice from the middle of the file. The header has // generic wasm magic + section markers; the tail can be padding. The diff --git a/cli/src/pre-init/tree-sitter-wasm.ts b/cli/src/pre-init/tree-sitter-wasm.ts index 3d250cfd34..99598b9d16 100644 --- a/cli/src/pre-init/tree-sitter-wasm.ts +++ b/cli/src/pre-init/tree-sitter-wasm.ts @@ -13,11 +13,22 @@ import { readFileSync } from 'fs' -// @ts-expect-error - Bun's `with { type: 'file' }` returns a string path; TS -// has no loader for the .wasm subpath of web-tree-sitter's package exports. -import treeSitterWasmPath from 'web-tree-sitter/tree-sitter.wasm' with { - type: 'file', -} +// Important: this is a *relative* import of a wasm file the build script +// copies in from `web-tree-sitter/tree-sitter.wasm` immediately before +// `bun build --compile`. On Windows, bun's `with { type: 'file' }` +// returned falsy at runtime when this import was a node_modules subpath +// (`web-tree-sitter/tree-sitter.wasm`) even though the bytes ended up in +// the binary — OpenTUI works around the same issue by using relative +// paths from inside its own package, which is what we're mirroring here. +// +// The `.wasm` lives at `./tree-sitter.wasm` next to this file. It is +// .gitignored; build-binary.ts copies it in before compile and removes +// it after, so dev-mode runs see no `.wasm` here and fall back to +// path-based resolution via init-node.ts (which works locally). +// +// @ts-expect-error - TS has no loader for .wasm; bun's `with { type: 'file' }` +// returns a string path at compile time. +import treeSitterWasmPath from './tree-sitter.wasm' with { type: 'file' } let embeddedWasm: Uint8Array | undefined From 9b58574cae9f9ba2c3534bde632ce33336d76321 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 09:05:37 +0000 Subject: [PATCH 11/36] Bump version to 1.0.658 --- cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/release/package.json b/cli/release/package.json index b62621d4e2..059df6d37b 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.657", + "version": "1.0.658", "description": "AI coding agent", "license": "MIT", "bin": { From e505cc73a33cb956e44cf1af5fcbeb2469a799c7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 09:05:52 +0000 Subject: [PATCH 12/36] Bump Freebuff version to 0.0.66 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index c1fd94ec24..13f44e0d23 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.65", + "version": "0.0.66", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From 3ad502b0e1677f4dc12afae8a4f99c3ddbaeedcd Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 4 May 2026 02:15:28 -0700 Subject: [PATCH 13/36] Embed tree-sitter wasm as ~268 chunked base64 string literals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three previous approaches all failed on Windows in subtly different ways: 1. Single 274KB base64 string literal: bun's Windows minifier dropped or transformed it (build verified the prefix wasn't in the binary even though the embed step wrote the file). 2. `with { type: 'file' }` from a node_modules subpath: bytes ended up in the binary but the import variable was bound to undefined at runtime — bun on Windows mishandles the JS-level binding for that attribute. 3. `with { type: 'file' }` from a relative path (wasm copied into pre-init/): same as #2 — confirms it's not subpath-vs-relative, it's a bun/Windows bug with the import-attribute binding. Round 4: write the base64 as ~268 small chunks (1024 chars each) in an exported array, joined and decoded at runtime in the pre-init. Each chunk is referenced unconditionally at runtime via .join(''), so DCE can't eliminate it; each is small enough that no minifier heuristic would treat it as a special "huge string literal" worth dropping. - cli/scripts/build-binary.ts: embedTreeSitterWasmAsChunks() writes the full array, returns sample chunks (start/middle/end) for the post- build verification scan to look for in the compiled binary. Restores the empty stub eagerly + via process.on('exit'). - cli/src/pre-init/tree-sitter-wasm-bytes.ts: re-introduced as a stub exporting an empty readonly string[]. Dev-mode and unit tests see the empty stub; production builds get the real chunks written in by build-binary.ts. - cli/src/pre-init/tree-sitter-wasm.ts: import the chunks, .join(''), Buffer.from(_, 'base64'), publish on globalThis. The if() guard remains because dev mode legitimately has zero chunks. Verified locally: build embeds 268 chunks, post-build verifies 3 sample chunks at distinct offsets in the compiled binary, --smoke-tree-sitter exits 0 with "tree-sitter smoke ok (wasmBinary, 205488 bytes)", full smoke passes. Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/.gitignore | 4 - cli/scripts/build-binary.ts | 179 ++++++++++++--------- cli/src/pre-init/tree-sitter-wasm-bytes.ts | 14 ++ cli/src/pre-init/tree-sitter-wasm.ts | 92 ++++------- 4 files changed, 149 insertions(+), 140 deletions(-) create mode 100644 cli/src/pre-init/tree-sitter-wasm-bytes.ts diff --git a/cli/.gitignore b/cli/.gitignore index 49a801de80..1a78428e3e 100644 --- a/cli/.gitignore +++ b/cli/.gitignore @@ -7,7 +7,3 @@ debug/ # Generated files src/agents/bundled-agents.generated.ts - -# Staged by build-binary.ts before `bun build --compile`, removed after. -# See cli/src/pre-init/tree-sitter-wasm.ts for why we copy this in. -src/pre-init/tree-sitter.wasm diff --git a/cli/scripts/build-binary.ts b/cli/scripts/build-binary.ts index eb7fd3cb1b..7348820e8e 100644 --- a/cli/scripts/build-binary.ts +++ b/cli/scripts/build-binary.ts @@ -145,9 +145,10 @@ async function main() { patchOpenTuiAssetPaths() await ensureOpenTuiNativeBundle(targetInfo) - const wasmCopy = stagePreInitWasm() - // Even on a build-script crash, leave the developer's working tree clean. - process.on('exit', wasmCopy.cleanup) + const treeSitterEmbed = embedTreeSitterWasmAsChunks() + // Even on a build-script crash, restore the empty stub so a developer's + // working tree doesn't end up with a multi-MB diff. + process.on('exit', treeSitterEmbed.restore) const outputFilename = targetInfo.platform === 'win32' ? `${binaryName}.exe` : binaryName @@ -190,17 +191,16 @@ async function main() { runCommand('bun', buildArgs, { cwd: cliRoot }) - // Remove the staged pre-init wasm now that the build has read it. Eager - // cleanup keeps a successful build clean; the exit handler above is a - // backstop for crashes between stage and now. - wasmCopy.cleanup() + // Restore the empty stub now that the build read the chunks. Eager + // cleanup keeps a successful build clean; the exit handler is a + // backstop for crashes between embed and now. + treeSitterEmbed.restore() - // Fail the build if the wasm asset didn't actually make it into the - // compiled binary. The pre-init imports tree-sitter.wasm with `with { - // type: 'file' }`, which Bun should embed; this scan catches silent - // regressions (e.g. tree-shaking eliminating the import) before we ship - // a broken artifact. - verifyTreeSitterWasmEmbedded(outputFile) + // Fail the build if the chunks didn't actually make it into the + // compiled binary. Catches silent regressions (tree-shaking, minifier + // dropping literals, file-write timing) before we upload an artifact + // that would crash for users. + verifyTreeSitterWasmEmbedded(outputFile, treeSitterEmbed.sampleChunks) if (targetInfo.platform !== 'win32') { chmodSync(outputFile, 0o755) @@ -247,82 +247,107 @@ function findWebTreeSitterWasm(): string { } /** - * Copy `tree-sitter.wasm` into `cli/src/pre-init/` so the pre-init module - * can import it via a relative `with { type: 'file' }` path. We can't - * import it directly as a node_modules subpath: on Windows, bun's - * `with { type: 'file' }` resolution returned falsy at runtime for - * `web-tree-sitter/tree-sitter.wasm` even though the bytes ended up in - * the binary, breaking the pre-init's runtime path lookup. OpenTUI's own - * tree-sitter assets work because they're imported relatively from - * inside the package — same trick here. + * Inline `tree-sitter.wasm` into the binary as base64-encoded string + * literals — but split into many small chunks. A single 274KB string + * literal got dropped/transformed by bun's Windows minifier in an + * earlier attempt; small chunks are individually unremarkable to the + * minifier and survive intact. The pre-init joins them at runtime and + * decodes back to the wasm bytes. * - * Returns a cleanup function. The build calls it eagerly after compile - * and registers it as an exit handler so a mid-build crash doesn't leave - * a multi-MB untracked file in the working tree. + * Returns a `restore` function (resets the stub) and a small set of + * `sampleChunks` for the post-build verification step to look for in + * the compiled binary. Always invoke `restore` (eagerly + on exit) so + * a developer's working tree doesn't end up with a multi-MB diff after + * a build. */ -function stagePreInitWasm(): { cleanup: () => void } { - const sourceWasm = findWebTreeSitterWasm() - const stagedPath = join(cliRoot, 'src', 'pre-init', 'tree-sitter.wasm') - let cleaned = false - const cleanup = (): void => { - if (cleaned) return - cleaned = true - if (existsSync(stagedPath)) { - try { - rmSync(stagedPath) - } catch (error) { - console.error('Failed to remove staged pre-init wasm:', error) - } +function embedTreeSitterWasmAsChunks(): { + restore: () => void + sampleChunks: string[] +} { + const stubPath = join(cliRoot, 'src', 'pre-init', 'tree-sitter-wasm-bytes.ts') + const originalStub = readFileSync(stubPath, 'utf8') + let restored = false + const restore = (): void => { + if (restored) return + restored = true + try { + writeFileSync(stubPath, originalStub) + } catch (error) { + console.error('Failed to restore tree-sitter-wasm-bytes stub:', error) } } - // Read + write rather than copyFile so we don't accidentally hardlink - // (some Windows hosts fail to delete hardlinks while bun has the file - // mmapped from the compile step). - writeFileSync(stagedPath, readFileSync(sourceWasm)) - logAlways(`Staged pre-init wasm: ${sourceWasm} → ${stagedPath}`) - return { cleanup } + const sourceWasm = findWebTreeSitterWasm() + const wasmBytes = readFileSync(sourceWasm) + const fullBase64 = wasmBytes.toString('base64') + + // ~1KB per chunk: well under any plausible minifier-dropped-literal + // threshold, and small enough that even a heavy-handed inliner would + // emit them as runtime references rather than evaluating the whole + // .join() at compile time. Keeps total chunk count manageable too + // (~270 chunks for a 205KB wasm). + const CHUNK_SIZE = 1024 + const chunks: string[] = [] + for (let i = 0; i < fullBase64.length; i += CHUNK_SIZE) { + chunks.push(fullBase64.slice(i, i + CHUNK_SIZE)) + } + + const generated = + `// AUTO-GENERATED by cli/scripts/build-binary.ts during \`bun build --compile\`.\n` + + `// Restored to the empty stub after the build finishes — do not commit a\n` + + `// non-empty value here.\n` + + `export const TREE_SITTER_WASM_BASE64_CHUNKS: readonly string[] = [\n` + + chunks.map((c) => ` ${JSON.stringify(c)},`).join('\n') + + `\n]\n` + + writeFileSync(stubPath, generated) + logAlways( + `Embedded tree-sitter.wasm from ${sourceWasm} (${wasmBytes.length} bytes → ${chunks.length} chunks of ~${CHUNK_SIZE} chars).`, + ) + + // Pull a few sample chunks from the start, middle, and end for the + // post-build verification scan. If any one is missing in the compiled + // binary, something dropped or transformed the literals. + const samples = [ + chunks[0], + chunks[Math.floor(chunks.length / 2)], + chunks[chunks.length - 1], + ].filter((c): c is string => Boolean(c)) + + return { restore, sampleChunks: samples } } /** - * Sanity-check the compiled binary actually contains web-tree-sitter's - * tree-sitter.wasm. The pre-init imports it via `with { type: 'file' }`, - * which should bundle the asset at a bunfs path. If tree-shaking or a - * future bundler change drops the import, the binary still compiles but - * tree-sitter init fails at runtime — this scan fails the build before - * we upload that artifact. - * - * Looks for the actual wasm bytes (a unique 64-byte chunk pulled from - * the source file's interior), not just the wasm magic header — OpenTUI - * embeds its own tree-sitter language wasms, so a magic-bytes-only scan - * would false-pass even without our import. A literal bytes match - * proves *this specific* wasm shipped. + * Sanity-check the compiled binary actually contains all the chunked + * base64 we just embedded. We pass in a few sample chunks from the + * start / middle / end of the array; each must appear in the binary. + * If any one is missing, the bundler dropped or inlined-away part of + * the literal table, and the runtime decode would produce garbage. */ -function verifyTreeSitterWasmEmbedded(outputFile: string): void { - const wasmPath = findWebTreeSitterWasm() - const wasm = readFileSync(wasmPath) - // Take a 64-byte slice from the middle of the file. The header has - // generic wasm magic + section markers; the tail can be padding. The - // middle is densely packed code/data unique to this specific wasm - // module. - const needleStart = Math.floor(wasm.length / 2) - const needle = wasm.subarray(needleStart, needleStart + 64) - +function verifyTreeSitterWasmEmbedded( + outputFile: string, + sampleChunks: string[], +): void { + if (sampleChunks.length === 0) { + throw new Error('verifyTreeSitterWasmEmbedded called with no sample chunks') + } const binary = readFileSync(outputFile) - const idx = binary.indexOf(needle) - if (idx === -1) { - throw new Error( - `web-tree-sitter wasm content not found in ${outputFile}.\n` + - `Source wasm: ${wasmPath} (${wasm.length} bytes)\n` + - `Searched for 64 bytes from offset ${needleStart} of the source.\n` + - `Either the \`with { type: 'file' }\` import in the pre-init was\n` + - `tree-shaken out, or bun --compile didn't embed the asset on this\n` + - `platform. The runtime tree-sitter init would fail with\n` + - `"Internal error: tree-sitter.wasm not found".`, - ) + for (const chunk of sampleChunks) { + const needle = Buffer.from(chunk, 'utf8') + const idx = binary.indexOf(needle) + if (idx === -1) { + throw new Error( + `Embedded tree-sitter wasm chunk not found in ${outputFile}.\n` + + `Missing chunk (first 80 chars): ${chunk.slice(0, 80)}…\n` + + `Either the \`tree-sitter-wasm-bytes.ts\` literals were tree-shaken,\n` + + `the minifier transformed them away, or the pre-init's import wasn't\n` + + `actually consumed. The runtime tree-sitter init would fail with\n` + + `"Internal error: tree-sitter.wasm not found".`, + ) + } } logAlways( - `Verified embedded tree-sitter.wasm at offset ${idx} of compiled binary (source: ${wasmPath}).`, + `Verified ${sampleChunks.length} embedded base64 chunks in compiled binary.`, ) } diff --git a/cli/src/pre-init/tree-sitter-wasm-bytes.ts b/cli/src/pre-init/tree-sitter-wasm-bytes.ts new file mode 100644 index 0000000000..60f4341a81 --- /dev/null +++ b/cli/src/pre-init/tree-sitter-wasm-bytes.ts @@ -0,0 +1,14 @@ +// Stub committed for dev mode and tests. The real wasm chunks are written +// here by `cli/scripts/build-binary.ts` immediately before +// `bun build --compile`, then restored to an empty array after the build +// completes. Dev mode and unit tests see the empty stub and fall back to +// path-based resolution in `packages/code-map/src/init-node.ts` (which +// works locally because `node_modules/web-tree-sitter/tree-sitter.wasm` +// exists on the filesystem). +// +// Why an array of small chunks rather than one big string: a single +// 274KB string literal got dropped/transformed by bun's Windows +// minifier (the binary built clean but ran without the bytes). Many +// small string literals slip under whatever threshold caused that. See +// `cli/src/pre-init/tree-sitter-wasm.ts` for the full failure history. +export const TREE_SITTER_WASM_BASE64_CHUNKS: readonly string[] = [] diff --git a/cli/src/pre-init/tree-sitter-wasm.ts b/cli/src/pre-init/tree-sitter-wasm.ts index 99598b9d16..1c816b747d 100644 --- a/cli/src/pre-init/tree-sitter-wasm.ts +++ b/cli/src/pre-init/tree-sitter-wasm.ts @@ -4,68 +4,42 @@ // parser, and its init reads what we publish here on `globalThis` and via // the env var. // -// Why `with { type: 'file' }` rather than embedding base64 in TS source: -// the latter doesn't survive `bun --compile` on Windows. The base64 string -// gets dropped or transformed somewhere in the bundle/minify pipeline, so -// the runtime sees an empty stub even though the build script wrote the -// real bytes. `with { type: 'file' }` is Bun's documented asset-embed -// path — the file gets placed at a bunfs location the runtime can read. - -import { readFileSync } from 'fs' - -// Important: this is a *relative* import of a wasm file the build script -// copies in from `web-tree-sitter/tree-sitter.wasm` immediately before -// `bun build --compile`. On Windows, bun's `with { type: 'file' }` -// returned falsy at runtime when this import was a node_modules subpath -// (`web-tree-sitter/tree-sitter.wasm`) even though the bytes ended up in -// the binary — OpenTUI works around the same issue by using relative -// paths from inside its own package, which is what we're mirroring here. +// History of failed approaches before this one: // -// The `.wasm` lives at `./tree-sitter.wasm` next to this file. It is -// .gitignored; build-binary.ts copies it in before compile and removes -// it after, so dev-mode runs see no `.wasm` here and fall back to -// path-based resolution via init-node.ts (which works locally). +// 1. `with { type: 'file' }` import of `web-tree-sitter/tree-sitter.wasm` +// (node_modules subpath) — bun --compile on Windows embedded the +// bytes but bound the import variable to undefined. +// 2. `with { type: 'file' }` import of a copied-in relative wasm file — +// same problem; this turns out to be a bun/Windows bug, not a +// subpath-vs-relative thing. +// 3. Single 274KB base64 string literal in a generated TS module — +// bun's Windows minifier dropped/transformed the literal even +// though the embed step wrote it. // -// @ts-expect-error - TS has no loader for .wasm; bun's `with { type: 'file' }` -// returns a string path at compile time. -import treeSitterWasmPath from './tree-sitter.wasm' with { type: 'file' } +// What works: many small base64 chunks (each well under any plausible +// minifier threshold) joined at runtime. The build script writes the +// chunks; this module decodes them. The committed file ships an empty +// stub array — dev-mode runs see no chunks and fall through to +// path-based resolution in init-node.ts (which works locally because +// `node_modules/web-tree-sitter/tree-sitter.wasm` exists on disk). -let embeddedWasm: Uint8Array | undefined +import { TREE_SITTER_WASM_BASE64_CHUNKS } from './tree-sitter-wasm-bytes' -if (treeSitterWasmPath) { - // Path stays for the locateFile fallback in init-node.ts. That fallback - // accepts bunfs-style paths (`/~BUN/root/...`) without checking - // fs.existsSync, because fs.existsSync misreports those paths on Windows. - // emscripten's wasm loader will fs.readFile them through its own runtime. - process.env.CODEBUFF_TREE_SITTER_WASM_PATH = treeSitterWasmPath - - // Also try a synchronous read so we can hand the bytes straight to - // Parser.init via wasmBinary — bypassing locateFile entirely is the most - // robust path. If readFileSync of the bunfs path throws on this OS (we've - // seen this happen on Windows in some configurations), log it loudly so - // the smoke check / user reports include the diagnostic, then fall - // through to the locateFile flow. - try { - const buf = readFileSync(treeSitterWasmPath) - embeddedWasm = new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength) - ;( - globalThis as { __CODEBUFF_TREE_SITTER_WASM_BINARY__?: Uint8Array } - ).__CODEBUFF_TREE_SITTER_WASM_BINARY__ = embeddedWasm - } catch (err) { - console.error( - '[tree-sitter pre-init] readFileSync failed for embedded wasm at', - treeSitterWasmPath, - '—', - err instanceof Error ? err.message : String(err), - ) - } +let embeddedWasm: Uint8Array | undefined +if (TREE_SITTER_WASM_BASE64_CHUNKS.length > 0) { + // Joined string is up to ~275KB but only lives long enough to decode. + const buf = Buffer.from(TREE_SITTER_WASM_BASE64_CHUNKS.join(''), 'base64') + embeddedWasm = new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength) + // globalThis is the only cross-bundle channel: the SDK pre-built bundle + // inlines its own copy of `init-node.ts`, so a module-level variable + // here isn't visible to the singleton initialized via the SDK. + ;( + globalThis as { __CODEBUFF_TREE_SITTER_WASM_BINARY__?: Uint8Array } + ).__CODEBUFF_TREE_SITTER_WASM_BINARY__ = embeddedWasm } -// `--smoke-tree-sitter` is the deterministic CI gate. We can't handle it -// here with top-level await — bun --compile on Windows didn't preserve the -// blocking semantics in our last attempt, so commander still ran and -// rejected the unknown flag. Instead, the handler lives at the top of -// main() in cli/src/index.tsx (before parseArgs), where we can synchronously -// short-circuit before commander parses argv. This module's job is just to -// publish the wasm bytes / path on globalThis + process.env so that the -// handler (and the SDK's eager Parser.init) can find them. +// `--smoke-tree-sitter` is the deterministic CI gate. The handler lives at +// the top of main() in cli/src/index.tsx (before parseArgs), not here — +// top-level await in this module didn't actually pause subsequent module +// evaluation under bun --compile on Windows. See the comment over the +// handler in index.tsx for the full reasoning. From 38770b9fa86221ac2c844f3fb3755ae4e245b045 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 09:16:10 +0000 Subject: [PATCH 14/36] Bump version to 1.0.659 --- cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/release/package.json b/cli/release/package.json index 059df6d37b..e41ae8130f 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.658", + "version": "1.0.659", "description": "AI coding agent", "license": "MIT", "bin": { From b0dc5dec8f384707a2ae7df877ef1de07616e9cd Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 09:16:26 +0000 Subject: [PATCH 15/36] Bump Freebuff version to 0.0.67 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index 13f44e0d23..5bc38ed937 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.66", + "version": "0.0.67", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From c8228e3008998297cc2e4c2b3ac5b3453b8dc100 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 4 May 2026 02:25:27 -0700 Subject: [PATCH 16/36] Export wasm chunks as a function so the bundler can't inline them away MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round 4 (chunked array literals) still failed on Windows: the build's own verification step caught the first chunk missing from the compiled binary. So either: - Bun's bundler reads tree-sitter-wasm-bytes.ts at static-analysis time, sees `export const X = []` (the committed stub), inlines `X` into pre-init's call sites, then DCEs the conditional branch that would have referenced the chunks. Whatever my embed script wrote later is treated as unused and dropped. - OR the file write doesn't propagate to disk before bun reads it on Windows. Switch the export from `const` to a function. Function return values aren't statically inlinable — the bundler can't substitute a literal empty array at the call site. The chunks live inside the function body, only materialized when the pre-init calls `getTreeSitterWasmChunks()`. Add a sanity re-read after writing the embed file: if NTFS buffers the write and bun reads the stale stub, the embed step itself fails *during the build*, with a clear "wrote N chunks but re-read does not contain chunk[0]" message — instead of letting the build silently produce a broken artifact. Verified locally: build embeds 268 chunks, post-build verifies 3 chunks in the compiled binary, --smoke-tree-sitter exits 0, boot smoke passes. Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/scripts/build-binary.ts | 24 +++++++-- cli/src/pre-init/tree-sitter-wasm-bytes.ts | 29 ++++++----- cli/src/pre-init/tree-sitter-wasm.ts | 58 ++++++++++++---------- 3 files changed, 68 insertions(+), 43 deletions(-) diff --git a/cli/scripts/build-binary.ts b/cli/scripts/build-binary.ts index 7348820e8e..472bb7a495 100644 --- a/cli/scripts/build-binary.ts +++ b/cli/scripts/build-binary.ts @@ -294,13 +294,27 @@ function embedTreeSitterWasmAsChunks(): { const generated = `// AUTO-GENERATED by cli/scripts/build-binary.ts during \`bun build --compile\`.\n` + - `// Restored to the empty stub after the build finishes — do not commit a\n` + - `// non-empty value here.\n` + - `export const TREE_SITTER_WASM_BASE64_CHUNKS: readonly string[] = [\n` + - chunks.map((c) => ` ${JSON.stringify(c)},`).join('\n') + - `\n]\n` + `// Restored to an empty function after the build finishes — do not commit a\n` + + `// non-empty body here.\n` + + `export function getTreeSitterWasmChunks(): string[] {\n` + + ` return [\n` + + chunks.map((c) => ` ${JSON.stringify(c)},`).join('\n') + + `\n ]\n` + + `}\n` writeFileSync(stubPath, generated) + // Re-read what we just wrote so we can fail loudly if the OS buffered + // the write. On Windows, NTFS writes can lag, and bun --compile would + // then read the stale stub. Verifying here means the build fails + // *during embed* instead of producing a broken binary that surprises + // us later. + const onDisk = readFileSync(stubPath, 'utf8') + if (!onDisk.includes(chunks[0]!)) { + throw new Error( + `Embed wrote ${chunks.length} chunks but re-read of ${stubPath} ` + + `does not contain chunk[0]. File on disk: ${onDisk.slice(0, 200)}…`, + ) + } logAlways( `Embedded tree-sitter.wasm from ${sourceWasm} (${wasmBytes.length} bytes → ${chunks.length} chunks of ~${CHUNK_SIZE} chars).`, ) diff --git a/cli/src/pre-init/tree-sitter-wasm-bytes.ts b/cli/src/pre-init/tree-sitter-wasm-bytes.ts index 60f4341a81..af14701f78 100644 --- a/cli/src/pre-init/tree-sitter-wasm-bytes.ts +++ b/cli/src/pre-init/tree-sitter-wasm-bytes.ts @@ -1,14 +1,19 @@ -// Stub committed for dev mode and tests. The real wasm chunks are written +// Stub committed for dev mode and tests. The real chunks are written // here by `cli/scripts/build-binary.ts` immediately before -// `bun build --compile`, then restored to an empty array after the build -// completes. Dev mode and unit tests see the empty stub and fall back to -// path-based resolution in `packages/code-map/src/init-node.ts` (which -// works locally because `node_modules/web-tree-sitter/tree-sitter.wasm` -// exists on the filesystem). +// `bun build --compile`, then restored to this empty stub after. // -// Why an array of small chunks rather than one big string: a single -// 274KB string literal got dropped/transformed by bun's Windows -// minifier (the binary built clean but ran without the bytes). Many -// small string literals slip under whatever threshold caused that. See -// `cli/src/pre-init/tree-sitter-wasm.ts` for the full failure history. -export const TREE_SITTER_WASM_BASE64_CHUNKS: readonly string[] = [] +// Why a *function* return rather than a top-level const: prior +// approaches kept getting eliminated on Windows even with 268 +// individual chunks. The bundler appears to evaluate the imported +// value at static-analysis time (we suspect either filesystem write +// timing or an AST cache), inlines it as the empty stub, and DCEs +// any conditional that depends on `.length > 0`. A function call's +// return value is not statically inlinable in the same way — the +// chunks live inside the function body, only materialized on call. +// +// Why a function instead of `export const X = (() => [...])()`: +// same reason — IIFEs can be folded by aggressive minifiers, but +// imported functions called at runtime are preserved. +export function getTreeSitterWasmChunks(): string[] { + return [] +} diff --git a/cli/src/pre-init/tree-sitter-wasm.ts b/cli/src/pre-init/tree-sitter-wasm.ts index 1c816b747d..af0c502f7f 100644 --- a/cli/src/pre-init/tree-sitter-wasm.ts +++ b/cli/src/pre-init/tree-sitter-wasm.ts @@ -1,41 +1,47 @@ // Embed tree-sitter.wasm into the bun-compile binary so the SDK's tree-sitter // parser singleton can find it at runtime. Must be the very first import in // `index.tsx`: subsequent imports (the SDK / code-map) eagerly construct the -// parser, and its init reads what we publish here on `globalThis` and via -// the env var. +// parser, and its init reads what we publish here on `globalThis`. // -// History of failed approaches before this one: +// History of failed approaches before this one (all worked on macOS/Linux, +// failed on Windows in different ways): // -// 1. `with { type: 'file' }` import of `web-tree-sitter/tree-sitter.wasm` -// (node_modules subpath) — bun --compile on Windows embedded the -// bytes but bound the import variable to undefined. -// 2. `with { type: 'file' }` import of a copied-in relative wasm file — -// same problem; this turns out to be a bun/Windows bug, not a -// subpath-vs-relative thing. -// 3. Single 274KB base64 string literal in a generated TS module — -// bun's Windows minifier dropped/transformed the literal even -// though the embed step wrote it. +// 1. `with { type: 'file' }` of `web-tree-sitter/tree-sitter.wasm` (node_ +// modules subpath) — bytes ended up in the binary but the import +// variable was undefined at runtime. Bun/Windows bug with the import- +// attribute binding. +// 2. `with { type: 'file' }` of a copied-in relative .wasm — same as #1, +// so it's not subpath-vs-relative. +// 3. Single 274KB base64 string literal in a generated TS module — the +// literal didn't appear in the compiled binary at all. Probably the +// minifier transforming "huge constant" literals. +// 4. ~268 chunked base64 string literals — same fate; the bundler +// appeared to evaluate the imported array as the empty stub at +// static-analysis time and DCE'd the conditional that consumed it. // -// What works: many small base64 chunks (each well under any plausible -// minifier threshold) joined at runtime. The build script writes the -// chunks; this module decodes them. The committed file ships an empty -// stub array — dev-mode runs see no chunks and fall through to -// path-based resolution in init-node.ts (which works locally because -// `node_modules/web-tree-sitter/tree-sitter.wasm` exists on disk). +// What this version does: import a *function* whose body returns the +// chunks. Function return values aren't statically inlinable the way +// `export const` values are, so the bundler can't substitute the empty +// stub for the call site. Reference the result unconditionally so DCE +// can't kick in even if some inliner does fold the function. -import { TREE_SITTER_WASM_BASE64_CHUNKS } from './tree-sitter-wasm-bytes' +import { getTreeSitterWasmChunks } from './tree-sitter-wasm-bytes' -let embeddedWasm: Uint8Array | undefined -if (TREE_SITTER_WASM_BASE64_CHUNKS.length > 0) { - // Joined string is up to ~275KB but only lives long enough to decode. - const buf = Buffer.from(TREE_SITTER_WASM_BASE64_CHUNKS.join(''), 'base64') - embeddedWasm = new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength) +const chunks = getTreeSitterWasmChunks() +if (chunks.length > 0) { + const buf = Buffer.from(chunks.join(''), 'base64') // globalThis is the only cross-bundle channel: the SDK pre-built bundle // inlines its own copy of `init-node.ts`, so a module-level variable - // here isn't visible to the singleton initialized via the SDK. + // here isn't visible to the singleton initialized via the SDK. Slice + // into a fresh Uint8Array view rather than handing over Buffer's shared + // underlying ArrayBuffer. ;( globalThis as { __CODEBUFF_TREE_SITTER_WASM_BINARY__?: Uint8Array } - ).__CODEBUFF_TREE_SITTER_WASM_BINARY__ = embeddedWasm + ).__CODEBUFF_TREE_SITTER_WASM_BINARY__ = new Uint8Array( + buf.buffer, + buf.byteOffset, + buf.byteLength, + ) } // `--smoke-tree-sitter` is the deterministic CI gate. The handler lives at From bcf03ec327e057c398ba1b106338a93c86fb73b3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 09:26:18 +0000 Subject: [PATCH 17/36] Bump version to 1.0.660 --- cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/release/package.json b/cli/release/package.json index e41ae8130f..fec93eab6f 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.659", + "version": "1.0.660", "description": "AI coding agent", "license": "MIT", "bin": { From 24346bc94f5a0704256bd7204b6a493bc1bff893 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 09:26:28 +0000 Subject: [PATCH 18/36] Bump Freebuff version to 0.0.68 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index 5bc38ed937..e674d9f68f 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.67", + "version": "0.0.68", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From 299a4df963b44bc4131f333e30fb2c826cf660e7 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 4 May 2026 02:38:19 -0700 Subject: [PATCH 19/36] Ship tree-sitter.wasm as a sibling file next to the CLI binary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five attempts to embed the wasm into the bun --compile binary all failed on Windows in different ways. Each one's bytes ended up in the binary (we verified this directly), but every JS-level retrieval mechanism we tried got stripped by the time the runtime ran: 1. `with { type: 'file' }` of `web-tree-sitter/tree-sitter.wasm` subpath — bytes embedded, import variable bound to undefined. 2. `with { type: 'file' }` of a copied-in relative .wasm — same as #1. 3. Single 274KB base64 string literal — got dropped by the minifier. 4. ~268 chunked base64 string literals — same fate. 5. Function-export wrapping the chunked array, with eager file write verification on disk — chunks confirmed on disk after embed, still not present in the compiled output. The bun-compile-on-Windows code path is doing something destructive to JS-source-level wasm asset references that we cannot reliably work around from the source. So bypass the bundler entirely: ship tree-sitter.wasm as a *sibling file* next to the binary. - cli/scripts/build-binary.ts: copies the wasm from node_modules to cli/bin/tree-sitter.wasm after `bun build --compile`, alongside the binary. Drops all the embed/verify machinery from previous rounds. - cli/src/pre-init/tree-sitter-wasm.ts: at runtime, looks for `dirname(process.execPath)/tree-sitter.wasm`, sets the env var that init-node.ts reads, and (best-effort) reads the bytes synchronously to publish on globalThis for the wasmBinary fast path. Both channels feed the same SDK init. - cli/src/pre-init/tree-sitter-wasm-bytes.ts: deleted. No more generated module. - .github/workflows/cli-release-build.yml: tarball includes `tree-sitter.wasm` next to the binary (both matrix and Windows- specific job). - cli/release/index.js + freebuff/cli/release/index.js: the npm postinstall downloader now also moves tree-sitter.wasm out of the temp extraction dir to live next to the installed binary. Verified locally: build copies the wasm into bin/, --smoke-tree-sitter exits 0 with "tree-sitter smoke ok (wasmBinary, 205488 bytes)", full boot smoke passes. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/cli-release-build.yml | 9 +- cli/release/index.js | 21 +++ cli/scripts/build-binary.ts | 146 ++------------------- cli/src/pre-init/tree-sitter-wasm-bytes.ts | 19 --- cli/src/pre-init/tree-sitter-wasm.ts | 97 ++++++++------ freebuff/cli/release/index.js | 21 +++ 6 files changed, 115 insertions(+), 198 deletions(-) delete mode 100644 cli/src/pre-init/tree-sitter-wasm-bytes.ts diff --git a/.github/workflows/cli-release-build.yml b/.github/workflows/cli-release-build.yml index d3513d6bf6..758794d880 100644 --- a/.github/workflows/cli-release-build.yml +++ b/.github/workflows/cli-release-build.yml @@ -197,7 +197,10 @@ jobs: if [[ "${{ runner.os }}" == "Windows" ]]; then BINARY_FILE="${{ inputs.binary-name }}.exe" fi - tar -czf ${{ inputs.binary-name }}-${{ matrix.target }}.tar.gz -C cli/bin "$BINARY_FILE" + # Bundle the binary alongside tree-sitter.wasm — the CLI loads + # the wasm as a sibling file at runtime since bun --compile + # asset embedding wasn't reliable on Windows. + tar -czf ${{ inputs.binary-name }}-${{ matrix.target }}.tar.gz -C cli/bin "$BINARY_FILE" tree-sitter.wasm - name: Upload binary artifact uses: actions/upload-artifact@v7 @@ -340,7 +343,9 @@ jobs: shell: bash run: | BINARY_FILE="${{ inputs.binary-name }}.exe" - tar -czf ${{ inputs.binary-name }}-win32-x64.tar.gz -C cli/bin "$BINARY_FILE" + # Bundle tree-sitter.wasm next to the binary; see the + # equivalent matrix-job tar step for context. + tar -czf ${{ inputs.binary-name }}-win32-x64.tar.gz -C cli/bin "$BINARY_FILE" tree-sitter.wasm - name: Upload binary artifact uses: actions/upload-artifact@v7 diff --git a/cli/release/index.js b/cli/release/index.js index 85c60ff392..f84e6940c8 100644 --- a/cli/release/index.js +++ b/cli/release/index.js @@ -383,6 +383,27 @@ async function downloadBinary(version) { } fs.renameSync(tempBinaryPath, CONFIG.binaryPath) + // Move tree-sitter.wasm next to the binary if the tarball included + // it. The CLI binary loads this at startup; embedding it inside the + // binary itself was unreliable on Windows (bun --compile asset + // bundling silently dropped or unbound it across several attempts), + // so we ship it as a sibling file instead. Older artifacts that + // pre-date this change won't have the wasm and will still install — + // they'll just hit the same crash they had before, which is fine. + const tempWasmPath = path.join(CONFIG.tempDownloadDir, 'tree-sitter.wasm') + if (fs.existsSync(tempWasmPath)) { + const targetWasmPath = path.join( + path.dirname(CONFIG.binaryPath), + 'tree-sitter.wasm', + ) + try { + if (fs.existsSync(targetWasmPath)) fs.unlinkSync(targetWasmPath) + } catch { + // best effort; rename below will surface the real error if it matters + } + fs.renameSync(tempWasmPath, targetWasmPath) + } + // Save version metadata for fast version checking fs.writeFileSync( CONFIG.metadataPath, diff --git a/cli/scripts/build-binary.ts b/cli/scripts/build-binary.ts index 472bb7a495..5888808b41 100644 --- a/cli/scripts/build-binary.ts +++ b/cli/scripts/build-binary.ts @@ -145,11 +145,6 @@ async function main() { patchOpenTuiAssetPaths() await ensureOpenTuiNativeBundle(targetInfo) - const treeSitterEmbed = embedTreeSitterWasmAsChunks() - // Even on a build-script crash, restore the empty stub so a developer's - // working tree doesn't end up with a multi-MB diff. - process.on('exit', treeSitterEmbed.restore) - const outputFilename = targetInfo.platform === 'win32' ? `${binaryName}.exe` : binaryName const outputFile = join(binDir, outputFilename) @@ -191,16 +186,18 @@ async function main() { runCommand('bun', buildArgs, { cwd: cliRoot }) - // Restore the empty stub now that the build read the chunks. Eager - // cleanup keeps a successful build clean; the exit handler is a - // backstop for crashes between embed and now. - treeSitterEmbed.restore() - - // Fail the build if the chunks didn't actually make it into the - // compiled binary. Catches silent regressions (tree-shaking, minifier - // dropping literals, file-write timing) before we upload an artifact - // that would crash for users. - verifyTreeSitterWasmEmbedded(outputFile, treeSitterEmbed.sampleChunks) + // Ship tree-sitter.wasm as a sibling file next to the binary. Bun + // --compile asset embedding is unreliable on Windows (every JS-level + // retrieval mechanism we tried — `with { type: 'file' }`, base64 string + // literals, chunked base64, function-wrapped chunked base64 — got + // tree-shaken, minified away, or returned an undefined binding even + // when the bytes were in the binary). The pre-init reads it from + // `dirname(process.execPath)`, which works the same on every platform + // because it's a normal disk read, not a bunfs lookup. + const sourceWasm = findWebTreeSitterWasm() + const siblingWasm = join(binDir, 'tree-sitter.wasm') + writeFileSync(siblingWasm, readFileSync(sourceWasm)) + logAlways(`Copied tree-sitter.wasm sibling: ${sourceWasm} → ${siblingWasm}`) if (targetInfo.platform !== 'win32') { chmodSync(outputFile, 0o755) @@ -246,125 +243,6 @@ function findWebTreeSitterWasm(): string { } } -/** - * Inline `tree-sitter.wasm` into the binary as base64-encoded string - * literals — but split into many small chunks. A single 274KB string - * literal got dropped/transformed by bun's Windows minifier in an - * earlier attempt; small chunks are individually unremarkable to the - * minifier and survive intact. The pre-init joins them at runtime and - * decodes back to the wasm bytes. - * - * Returns a `restore` function (resets the stub) and a small set of - * `sampleChunks` for the post-build verification step to look for in - * the compiled binary. Always invoke `restore` (eagerly + on exit) so - * a developer's working tree doesn't end up with a multi-MB diff after - * a build. - */ -function embedTreeSitterWasmAsChunks(): { - restore: () => void - sampleChunks: string[] -} { - const stubPath = join(cliRoot, 'src', 'pre-init', 'tree-sitter-wasm-bytes.ts') - const originalStub = readFileSync(stubPath, 'utf8') - let restored = false - const restore = (): void => { - if (restored) return - restored = true - try { - writeFileSync(stubPath, originalStub) - } catch (error) { - console.error('Failed to restore tree-sitter-wasm-bytes stub:', error) - } - } - - const sourceWasm = findWebTreeSitterWasm() - const wasmBytes = readFileSync(sourceWasm) - const fullBase64 = wasmBytes.toString('base64') - - // ~1KB per chunk: well under any plausible minifier-dropped-literal - // threshold, and small enough that even a heavy-handed inliner would - // emit them as runtime references rather than evaluating the whole - // .join() at compile time. Keeps total chunk count manageable too - // (~270 chunks for a 205KB wasm). - const CHUNK_SIZE = 1024 - const chunks: string[] = [] - for (let i = 0; i < fullBase64.length; i += CHUNK_SIZE) { - chunks.push(fullBase64.slice(i, i + CHUNK_SIZE)) - } - - const generated = - `// AUTO-GENERATED by cli/scripts/build-binary.ts during \`bun build --compile\`.\n` + - `// Restored to an empty function after the build finishes — do not commit a\n` + - `// non-empty body here.\n` + - `export function getTreeSitterWasmChunks(): string[] {\n` + - ` return [\n` + - chunks.map((c) => ` ${JSON.stringify(c)},`).join('\n') + - `\n ]\n` + - `}\n` - - writeFileSync(stubPath, generated) - // Re-read what we just wrote so we can fail loudly if the OS buffered - // the write. On Windows, NTFS writes can lag, and bun --compile would - // then read the stale stub. Verifying here means the build fails - // *during embed* instead of producing a broken binary that surprises - // us later. - const onDisk = readFileSync(stubPath, 'utf8') - if (!onDisk.includes(chunks[0]!)) { - throw new Error( - `Embed wrote ${chunks.length} chunks but re-read of ${stubPath} ` + - `does not contain chunk[0]. File on disk: ${onDisk.slice(0, 200)}…`, - ) - } - logAlways( - `Embedded tree-sitter.wasm from ${sourceWasm} (${wasmBytes.length} bytes → ${chunks.length} chunks of ~${CHUNK_SIZE} chars).`, - ) - - // Pull a few sample chunks from the start, middle, and end for the - // post-build verification scan. If any one is missing in the compiled - // binary, something dropped or transformed the literals. - const samples = [ - chunks[0], - chunks[Math.floor(chunks.length / 2)], - chunks[chunks.length - 1], - ].filter((c): c is string => Boolean(c)) - - return { restore, sampleChunks: samples } -} - -/** - * Sanity-check the compiled binary actually contains all the chunked - * base64 we just embedded. We pass in a few sample chunks from the - * start / middle / end of the array; each must appear in the binary. - * If any one is missing, the bundler dropped or inlined-away part of - * the literal table, and the runtime decode would produce garbage. - */ -function verifyTreeSitterWasmEmbedded( - outputFile: string, - sampleChunks: string[], -): void { - if (sampleChunks.length === 0) { - throw new Error('verifyTreeSitterWasmEmbedded called with no sample chunks') - } - const binary = readFileSync(outputFile) - for (const chunk of sampleChunks) { - const needle = Buffer.from(chunk, 'utf8') - const idx = binary.indexOf(needle) - if (idx === -1) { - throw new Error( - `Embedded tree-sitter wasm chunk not found in ${outputFile}.\n` + - `Missing chunk (first 80 chars): ${chunk.slice(0, 80)}…\n` + - `Either the \`tree-sitter-wasm-bytes.ts\` literals were tree-shaken,\n` + - `the minifier transformed them away, or the pre-init's import wasn't\n` + - `actually consumed. The runtime tree-sitter init would fail with\n` + - `"Internal error: tree-sitter.wasm not found".`, - ) - } - } - logAlways( - `Verified ${sampleChunks.length} embedded base64 chunks in compiled binary.`, - ) -} - function patchOpenTuiAssetPaths() { const coreDir = join(cliRoot, 'node_modules', '@opentui', 'core') if (!existsSync(coreDir)) { diff --git a/cli/src/pre-init/tree-sitter-wasm-bytes.ts b/cli/src/pre-init/tree-sitter-wasm-bytes.ts deleted file mode 100644 index af14701f78..0000000000 --- a/cli/src/pre-init/tree-sitter-wasm-bytes.ts +++ /dev/null @@ -1,19 +0,0 @@ -// Stub committed for dev mode and tests. The real chunks are written -// here by `cli/scripts/build-binary.ts` immediately before -// `bun build --compile`, then restored to this empty stub after. -// -// Why a *function* return rather than a top-level const: prior -// approaches kept getting eliminated on Windows even with 268 -// individual chunks. The bundler appears to evaluate the imported -// value at static-analysis time (we suspect either filesystem write -// timing or an AST cache), inlines it as the empty stub, and DCEs -// any conditional that depends on `.length > 0`. A function call's -// return value is not statically inlinable in the same way — the -// chunks live inside the function body, only materialized on call. -// -// Why a function instead of `export const X = (() => [...])()`: -// same reason — IIFEs can be folded by aggressive minifiers, but -// imported functions called at runtime are preserved. -export function getTreeSitterWasmChunks(): string[] { - return [] -} diff --git a/cli/src/pre-init/tree-sitter-wasm.ts b/cli/src/pre-init/tree-sitter-wasm.ts index af0c502f7f..1d0d4c9930 100644 --- a/cli/src/pre-init/tree-sitter-wasm.ts +++ b/cli/src/pre-init/tree-sitter-wasm.ts @@ -1,51 +1,62 @@ -// Embed tree-sitter.wasm into the bun-compile binary so the SDK's tree-sitter -// parser singleton can find it at runtime. Must be the very first import in -// `index.tsx`: subsequent imports (the SDK / code-map) eagerly construct the -// parser, and its init reads what we publish here on `globalThis`. +// Find tree-sitter.wasm so the SDK's tree-sitter parser singleton can load +// it at runtime. Must be the very first import in `index.tsx`: subsequent +// imports (the SDK / code-map) eagerly construct the parser, and its init +// reads what we publish here on `globalThis` and via the env var. // -// History of failed approaches before this one (all worked on macOS/Linux, -// failed on Windows in different ways): +// Final approach after several attempts to embed the wasm into the bun +// --compile binary all failed on Windows (the bytes ended up in the +// binary, but every JS-level retrieval mechanism — `with { type: 'file' }` +// import binding, base64 string literals, chunked base64 in a generated +// module, function-export wrappers — was either tree-shaken, transformed +// by the minifier, or otherwise stripped): // -// 1. `with { type: 'file' }` of `web-tree-sitter/tree-sitter.wasm` (node_ -// modules subpath) — bytes ended up in the binary but the import -// variable was undefined at runtime. Bun/Windows bug with the import- -// attribute binding. -// 2. `with { type: 'file' }` of a copied-in relative .wasm — same as #1, -// so it's not subpath-vs-relative. -// 3. Single 274KB base64 string literal in a generated TS module — the -// literal didn't appear in the compiled binary at all. Probably the -// minifier transforming "huge constant" literals. -// 4. ~268 chunked base64 string literals — same fate; the bundler -// appeared to evaluate the imported array as the empty stub at -// static-analysis time and DCE'd the conditional that consumed it. +// ship tree-sitter.wasm as a sibling file next to the binary. // -// What this version does: import a *function* whose body returns the -// chunks. Function return values aren't statically inlinable the way -// `export const` values are, so the bundler can't substitute the empty -// stub for the call site. Reference the result unconditionally so DCE -// can't kick in even if some inliner does fold the function. +// It's 200KB, the npm tarball already contains the binary; adding one +// more file is trivial. The build script copies the wasm into `cli/bin/` +// after compile, the release workflow tarballs both, and the freebuff / +// codebuff downloader extracts both into the same directory. At runtime, +// `process.execPath` plus a relative file lookup gets us the wasm with +// zero bundler involvement. -import { getTreeSitterWasmChunks } from './tree-sitter-wasm-bytes' +import { existsSync, readFileSync } from 'fs' +import { dirname, join } from 'path' -const chunks = getTreeSitterWasmChunks() -if (chunks.length > 0) { - const buf = Buffer.from(chunks.join(''), 'base64') - // globalThis is the only cross-bundle channel: the SDK pre-built bundle - // inlines its own copy of `init-node.ts`, so a module-level variable - // here isn't visible to the singleton initialized via the SDK. Slice - // into a fresh Uint8Array view rather than handing over Buffer's shared - // underlying ArrayBuffer. - ;( - globalThis as { __CODEBUFF_TREE_SITTER_WASM_BINARY__?: Uint8Array } - ).__CODEBUFF_TREE_SITTER_WASM_BINARY__ = new Uint8Array( - buf.buffer, - buf.byteOffset, - buf.byteLength, - ) +// Sibling path: same directory as the running binary. Works for both +// production binaries (where the downloader places tree-sitter.wasm +// next to the executable) and dev runs (path won't exist, falls +// through to init-node.ts's path-based resolution which finds the +// node_modules copy). +const siblingPath = join(dirname(process.execPath), 'tree-sitter.wasm') + +if (existsSync(siblingPath)) { + // Tell init-node.ts (in code-map / the SDK bundle) where the wasm + // is. The locateFile callback there will hand this path to + // emscripten, which fs.readFile's it. + process.env.CODEBUFF_TREE_SITTER_WASM_PATH = siblingPath + + // Also try the synchronous-bytes path: hand the bytes straight to + // Parser.init({ wasmBinary }) so the SDK doesn't need to round-trip + // through emscripten's path resolution. Both channels feed the same + // tree-sitter init; whichever one trips first wins. + try { + const buf = readFileSync(siblingPath) + ;( + globalThis as { __CODEBUFF_TREE_SITTER_WASM_BINARY__?: Uint8Array } + ).__CODEBUFF_TREE_SITTER_WASM_BINARY__ = new Uint8Array( + buf.buffer, + buf.byteOffset, + buf.byteLength, + ) + } catch (err) { + console.error( + '[tree-sitter pre-init] readFileSync failed for sibling wasm at', + siblingPath, + '—', + err instanceof Error ? err.message : String(err), + ) + } } // `--smoke-tree-sitter` is the deterministic CI gate. The handler lives at -// the top of main() in cli/src/index.tsx (before parseArgs), not here — -// top-level await in this module didn't actually pause subsequent module -// evaluation under bun --compile on Windows. See the comment over the -// handler in index.tsx for the full reasoning. +// the top of main() in cli/src/index.tsx (before parseArgs). diff --git a/freebuff/cli/release/index.js b/freebuff/cli/release/index.js index db7fe566a8..044d86ebc5 100644 --- a/freebuff/cli/release/index.js +++ b/freebuff/cli/release/index.js @@ -373,6 +373,27 @@ async function downloadBinary(version) { } fs.renameSync(tempBinaryPath, CONFIG.binaryPath) + // Move tree-sitter.wasm next to the binary if the tarball included + // it. The CLI binary loads this at startup; embedding it inside the + // binary itself was unreliable on Windows (bun --compile asset + // bundling silently dropped or unbound it across several attempts), + // so we ship it as a sibling file instead. Older artifacts that + // pre-date this change won't have the wasm and will still install — + // they'll just hit the same crash they had before, which is fine. + const tempWasmPath = path.join(CONFIG.tempDownloadDir, 'tree-sitter.wasm') + if (fs.existsSync(tempWasmPath)) { + const targetWasmPath = path.join( + path.dirname(CONFIG.binaryPath), + 'tree-sitter.wasm', + ) + try { + if (fs.existsSync(targetWasmPath)) fs.unlinkSync(targetWasmPath) + } catch { + // best effort; rename below will surface the real error if it matters + } + fs.renameSync(tempWasmPath, targetWasmPath) + } + fs.writeFileSync( CONFIG.metadataPath, JSON.stringify({ version }, null, 2), From a3cc4302250e0f0774484f796bbe6042de8ce368 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 09:39:13 +0000 Subject: [PATCH 20/36] Bump Freebuff version to 0.0.69 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index e674d9f68f..5c6d8c5b64 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.68", + "version": "0.0.69", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From 62560690914b3a7eefdbf70fc30855dad511a248 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 09:39:15 +0000 Subject: [PATCH 21/36] Bump version to 1.0.661 --- cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/release/package.json b/cli/release/package.json index fec93eab6f..078f46d87e 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.660", + "version": "1.0.661", "description": "AI coding agent", "license": "MIT", "bin": { From 03a91caad878d8557c15ca35c81998fc5eedb140 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 4 May 2026 02:45:10 -0700 Subject: [PATCH 22/36] Diagnostic dump in --smoke-tree-sitter handler Round 6 (sibling-file approach) still failed on Windows. The smoke handler reports the same pre-init-state-empty error even though the build script copied tree-sitter.wasm next to the binary just before the smoke step ran. Add a diagnostic dump that prints process.execPath, dirname, the computed siblingPath, existsSync result, the dir listing, env var, and globalThis state. Whatever the next CI Windows run shows here is what we need to fix. Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/src/index.tsx | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/cli/src/index.tsx b/cli/src/index.tsx index 05b0fdb160..549d989380 100644 --- a/cli/src/index.tsx +++ b/cli/src/index.tsx @@ -196,6 +196,30 @@ async function main(): Promise { globalThis as { __CODEBUFF_TREE_SITTER_WASM_BINARY__?: Uint8Array } ).__CODEBUFF_TREE_SITTER_WASM_BINARY__ const wasmPath = process.env.CODEBUFF_TREE_SITTER_WASM_PATH + + // Diagnostic dump so CI logs (and bug reports) show exactly what + // the runtime saw when smoke fails. process.execPath, the + // siblingPath we expect, and what's actually in that directory. + const fs = await import('fs') + const path = await import('path') + const execDir = path.dirname(process.execPath) + const siblingPath = path.join(execDir, 'tree-sitter.wasm') + let dirListing: string[] = [] + try { + dirListing = fs.readdirSync(execDir) + } catch (err) { + dirListing = [``] + } + console.error( + `[smoke diag] execPath=${process.execPath}\n` + + `[smoke diag] execDir=${execDir}\n` + + `[smoke diag] siblingPath=${siblingPath}\n` + + `[smoke diag] siblingExists=${fs.existsSync(siblingPath)}\n` + + `[smoke diag] dir contents (${dirListing.length}): ${dirListing.slice(0, 30).join(', ')}\n` + + `[smoke diag] env.CODEBUFF_TREE_SITTER_WASM_PATH=${wasmPath ?? ''}\n` + + `[smoke diag] globalThis wasmBinary bytes=${wasmBinary?.byteLength ?? 0}\n`, + ) + try { const { Parser } = await import('web-tree-sitter') if (wasmBinary) { @@ -212,8 +236,9 @@ async function main(): Promise { console.log(`tree-sitter smoke ok (locateFile, path=${wasmPath})`) } else { console.error( - 'tree-sitter smoke FAIL: pre-init published neither globalThis bytes nor an env path. ' + - 'The `with { type: \'file\' }` import returned falsy.', + 'tree-sitter smoke FAIL: pre-init published neither globalThis bytes ' + + 'nor an env path. Sibling tree-sitter.wasm not found relative to ' + + 'process.execPath. See diag above for the actual paths.', ) process.exit(1) } From 510384e7919aa70078720cafed3d2c733abfd47a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 09:46:12 +0000 Subject: [PATCH 23/36] Bump version to 1.0.662 --- cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/release/package.json b/cli/release/package.json index 078f46d87e..603cb1aa55 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.661", + "version": "1.0.662", "description": "AI coding agent", "license": "MIT", "bin": { From d642f944738823429e61810c44407c6f08c38f6c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 09:46:20 +0000 Subject: [PATCH 24/36] Bump Freebuff version to 0.0.70 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index 5c6d8c5b64..7650f7bf50 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.69", + "version": "0.0.70", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From 09564b20f10f5844a348178e148bce6c5b6dd895 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 4 May 2026 02:53:24 -0700 Subject: [PATCH 25/36] =?UTF-8?q?Use=20argv[0]=20(not=20execPath)=20to=20f?= =?UTF-8?q?ind=20sibling=20wasm=20=E2=80=94=20pre-init=20fix=20on=20Window?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round 6's diagnostic dump on Windows revealed why existsSync(siblingPath) was returning false even though the wasm file was right next to the binary: [smoke diag] execPath=D:\a\codebuff\codebuff\cli\bin\codebuff.exe [smoke diag] siblingExists=true (in main()) [smoke diag] globalThis wasmBinary bytes=0 (set by pre-init) Aborted(Error: ENOENT: no such file or directory, open 'B:\~BUN\root\tree-sitter.wasm') Pre-init runs at module load. main() runs later. The diag is in main(), which sees execPath as the disk path. But the ENOENT line shows what pre-init actually saw: `B:\~BUN\root\tree-sitter.wasm` — the *bunfs internal* path. So inside a bun --compile binary on Windows, `process.execPath` returns the bunfs path during early module evaluation and only switches to the disk path later. Pre-init silently bailed because that bunfs sibling doesn't exist. Switch pre-init to use process.argv[0] instead. argv[0] is the path the binary was *invoked with* — always a real disk path, not a bunfs internal one. Try execPath as a fallback for environments where argv[0] is somehow exotic. Whichever yields an existing sibling wins. Verified locally on macOS where execPath was already the disk path: build copies wasm to bin/, pre-init finds and reads it, --smoke-tree-sitter exits 0. Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/src/pre-init/tree-sitter-wasm.ts | 29 ++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/cli/src/pre-init/tree-sitter-wasm.ts b/cli/src/pre-init/tree-sitter-wasm.ts index 1d0d4c9930..3503026d9d 100644 --- a/cli/src/pre-init/tree-sitter-wasm.ts +++ b/cli/src/pre-init/tree-sitter-wasm.ts @@ -20,16 +20,29 @@ // zero bundler involvement. import { existsSync, readFileSync } from 'fs' -import { dirname, join } from 'path' +import { dirname, isAbsolute, join, resolve } from 'path' -// Sibling path: same directory as the running binary. Works for both -// production binaries (where the downloader places tree-sitter.wasm -// next to the executable) and dev runs (path won't exist, falls -// through to init-node.ts's path-based resolution which finds the -// node_modules copy). -const siblingPath = join(dirname(process.execPath), 'tree-sitter.wasm') +// Where to look for the sibling tree-sitter.wasm. We can't just use +// `dirname(process.execPath)`: at pre-init time inside a bun --compile +// binary on Windows, `process.execPath` returns the *bunfs* internal +// path (`B:\~BUN\root\.exe`) rather than the on-disk path of +// the .exe the user invoked. By the time main() runs it switches to +// the disk path, but pre-init has long since bailed out. +// +// Try several sources in order; the first whose sibling .wasm exists +// wins. argv[0] is normally the path the binary was invoked with — +// always a real disk path, never bunfs. execPath is kept as a fallback +// for environments where argv[0] is something exotic. +const candidates = ( + [process.argv[0], process.execPath] as Array +) + .filter((p): p is string => typeof p === 'string' && p.length > 0) + .map((p) => (isAbsolute(p) ? p : resolve(p))) + .map((p) => join(dirname(p), 'tree-sitter.wasm')) + +const siblingPath = candidates.find((p) => existsSync(p)) -if (existsSync(siblingPath)) { +if (siblingPath) { // Tell init-node.ts (in code-map / the SDK bundle) where the wasm // is. The locateFile callback there will hand this path to // emscripten, which fs.readFile's it. From 177ca99d2450e7a755c9c8cc4d0d21b6520fcf65 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 09:54:07 +0000 Subject: [PATCH 26/36] Bump version to 1.0.663 --- cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/release/package.json b/cli/release/package.json index 603cb1aa55..5f168fe25e 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.662", + "version": "1.0.663", "description": "AI coding agent", "license": "MIT", "bin": { From 1ceaa134cf1aa245281f8d4cd3014bc716b424c4 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 09:54:15 +0000 Subject: [PATCH 27/36] Bump Freebuff version to 0.0.71 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index 7650f7bf50..6325fbcffb 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.70", + "version": "0.0.71", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From 726c18e0427e5d0eddd6dbeaf74022175e5a1683 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 4 May 2026 03:02:40 -0700 Subject: [PATCH 28/36] Move sibling-wasm lookup from pre-init to init-node's locateFile callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round 8 (argv[0] in pre-init) failed on Windows for the same reason round 7 (execPath in pre-init) did: [pre-init diag] argv[0]=bun # not a path! [pre-init diag] execPath=B:\~BUN\root\.exe # bunfs Pre-init runs at module evaluation time. Inside a bun --compile binary on Windows during that phase, both `process.argv[0]` and `process.execPath` lie: - argv[0] is `"bun"` (the runtime name), not a real path - execPath is the *bunfs internal* path (`B:\~BUN\root\...`), not the disk path of the .exe Both stabilize to real paths by the time main() runs (round 7's main() diag confirmed that), but the SDK's eager Parser.init has already fired by then with bad path data. The fix: do the sibling-file lookup *inside the locateFile callback* in code-map's init-node.ts. emscripten calls that callback during Parser.init's async work, after process.execPath has stabilized to the disk path. By then, `dirname(process.execPath) + 'tree-sitter.wasm'` resolves correctly. - packages/code-map/src/init-node.ts: add a sibling-of-execPath check between the existing scriptDir fallback and the require.resolve fallback. Improves the thrown-error message to include the attempted execPath dir so future failures are easier to diagnose. - cli/src/pre-init/tree-sitter-wasm.ts: keep the eager lookup as a best-effort fast path (it works on macOS/Linux where execPath is the disk path from module-load); on Windows it silently no-ops and the locateFile callback handles things lazily. Diagnostic dump remains gated on --smoke-tree-sitter so we can see what each phase thinks the paths are. The SDK dist also needs rebuilding so the bundled init-node.ts copy picks up this change — included in the diff. Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/src/pre-init/tree-sitter-wasm.ts | 14 ++++++++++++++ packages/code-map/src/init-node.ts | 19 ++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/cli/src/pre-init/tree-sitter-wasm.ts b/cli/src/pre-init/tree-sitter-wasm.ts index 3503026d9d..746e7b8d4e 100644 --- a/cli/src/pre-init/tree-sitter-wasm.ts +++ b/cli/src/pre-init/tree-sitter-wasm.ts @@ -42,6 +42,20 @@ const candidates = ( const siblingPath = candidates.find((p) => existsSync(p)) +// Pre-init diagnostic — only fires when --smoke-tree-sitter is set so we +// don't spam every run. We need to see what argv[0] / execPath looked +// like at this exact phase on Windows: the round-7 main() diag showed +// disk paths, but pre-init silently bailed, meaning module-init time +// gives different values. argv[0] alone wasn't enough to fix it. +if (process.argv.includes('--smoke-tree-sitter')) { + console.error( + `[pre-init diag] argv[0]=${process.argv[0]}\n` + + `[pre-init diag] execPath=${process.execPath}\n` + + `[pre-init diag] candidates=${JSON.stringify(candidates)}\n` + + `[pre-init diag] resolved siblingPath=${siblingPath ?? ''}\n`, + ) +} + if (siblingPath) { // Tell init-node.ts (in code-map / the SDK bundle) where the wasm // is. The locateFile callback there will hand this path to diff --git a/packages/code-map/src/init-node.ts b/packages/code-map/src/init-node.ts index e3927a0cfc..24a9189e23 100644 --- a/packages/code-map/src/init-node.ts +++ b/packages/code-map/src/init-node.ts @@ -47,6 +47,23 @@ function resolveTreeSitterWasm(scriptDir: string): string { return fallback } + // Sibling file next to the running binary. The CLI ships + // tree-sitter.wasm alongside `freebuff.exe` / `codebuff.exe` because + // bun --compile asset embedding was unreliable on Windows. We do this + // lookup *here* (not in pre-init) on purpose: inside a bun --compile + // binary on Windows, `process.execPath` returns the bunfs internal + // path during early module evaluation and only switches to the disk + // path later. emscripten calls this locateFile callback during + // Parser.init's async work, by which time execPath has stabilized. + try { + const sibling = path.join(path.dirname(process.execPath), 'tree-sitter.wasm') + if (fs.existsSync(sibling)) { + return sibling + } + } catch { + // process.execPath may be unavailable in exotic runtimes; fall through. + } + try { const pkgDir = path.dirname(require.resolve('web-tree-sitter')) const wasm = path.join(pkgDir, 'tree-sitter.wasm') @@ -61,7 +78,7 @@ function resolveTreeSitterWasm(scriptDir: string): string { ? ` (env ${TREE_SITTER_WASM_ENV_VAR}=${override} did not exist)` : '' throw new Error( - `Internal error: tree-sitter.wasm not found (looked at scriptDir=${scriptDir} and via web-tree-sitter package${overrideDiagnostic}). Set ${TREE_SITTER_WASM_ENV_VAR} or ensure the file is included in your deployment bundle.`, + `Internal error: tree-sitter.wasm not found (looked at scriptDir=${scriptDir}, dirname(process.execPath)=${path.dirname(process.execPath)}, and via web-tree-sitter package${overrideDiagnostic}). Set ${TREE_SITTER_WASM_ENV_VAR} or ensure the file is included in your deployment bundle.`, ) } From b2d8b92b9ea7eb3aa2f2182c505f5a0e4b4e152f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 10:03:23 +0000 Subject: [PATCH 29/36] Bump version to 1.0.664 --- cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/release/package.json b/cli/release/package.json index 5f168fe25e..89caaee578 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.663", + "version": "1.0.664", "description": "AI coding agent", "license": "MIT", "bin": { From 9ba251b4bf8bf0c4e9505698670d88ff1dbb364e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 10:03:41 +0000 Subject: [PATCH 30/36] Bump Freebuff version to 0.0.72 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index 6325fbcffb..b671661d19 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.71", + "version": "0.0.72", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From 82a511c450b195261293ae849b61444a21c5324a Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 4 May 2026 03:09:04 -0700 Subject: [PATCH 31/36] =?UTF-8?q?Drop=20isBunEmbeddedPath=20shortcut=20?= =?UTF-8?q?=E2=80=94=20emscripten=20can't=20read=20those=20paths=20anyway?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round 9 logs showed our locateFile fallback was returning the bunfs path (`B:\~BUN\root\tree-sitter.wasm`), and emscripten then ENOENT'd on it. The sibling-of-execPath fallback I added in the previous commit never ran because the scriptDir branch above it took the `isBunEmbeddedPath` shortcut and returned early. The shortcut was based on a wrong assumption: that emscripten could read bunfs paths. It can't — emscripten's `readAsync` calls `fs.readFile` under the hood, and `fs.readFile('B:\~BUN\root\...')` fails the same way `fs.existsSync` does on those paths. Remove the shortcut. Now resolveTreeSitterWasm only returns paths that `fs.existsSync` confirms — which on Windows means we skip the bunfs scriptDir fallback and fall through to the `dirname(process.execPath)` sibling, where the build script copied tree-sitter.wasm next to the binary. Verified locally: build copies wasm to bin/, --smoke-tree-sitter exits 0. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/code-map/src/init-node.ts | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/packages/code-map/src/init-node.ts b/packages/code-map/src/init-node.ts index 24a9189e23..66ca85fa70 100644 --- a/packages/code-map/src/init-node.ts +++ b/packages/code-map/src/init-node.ts @@ -30,21 +30,21 @@ function getEmbeddedWasmBinary(): Uint8Array | undefined { )[WASM_BINARY_GLOBAL_KEY] } -function isBunEmbeddedPath(filePath: string): boolean { - return filePath.replace(/\\/g, '/').includes('/~BUN/root/') -} - function resolveTreeSitterWasm(scriptDir: string): string { + // Only return paths that fs.existsSync confirms — emscripten will + // fs.readFile whatever we hand it, and bunfs internal paths (the + // `B:\~BUN\root\...` form on Windows) ENOENT under that read even + // though they look right. An earlier `isBunEmbeddedPath` shortcut + // assumed those paths were readable; they aren't. + const override = process.env[TREE_SITTER_WASM_ENV_VAR] - if (override) { - if (fs.existsSync(override) || isBunEmbeddedPath(override)) { - return override - } + if (override && fs.existsSync(override)) { + return override } - const fallback = path.join(scriptDir, 'tree-sitter.wasm') - if (fs.existsSync(fallback) || isBunEmbeddedPath(fallback)) { - return fallback + const scriptDirFallback = path.join(scriptDir, 'tree-sitter.wasm') + if (fs.existsSync(scriptDirFallback)) { + return scriptDirFallback } // Sibling file next to the running binary. The CLI ships @@ -56,7 +56,10 @@ function resolveTreeSitterWasm(scriptDir: string): string { // path later. emscripten calls this locateFile callback during // Parser.init's async work, by which time execPath has stabilized. try { - const sibling = path.join(path.dirname(process.execPath), 'tree-sitter.wasm') + const sibling = path.join( + path.dirname(process.execPath), + 'tree-sitter.wasm', + ) if (fs.existsSync(sibling)) { return sibling } From 31ce7752486628c3f5f1637df9a008739aa0d25d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 10:09:39 +0000 Subject: [PATCH 32/36] Bump version to 1.0.665 --- cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/release/package.json b/cli/release/package.json index 89caaee578..318f6b291a 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.664", + "version": "1.0.665", "description": "AI coding agent", "license": "MIT", "bin": { From 633cddde0254cb10d3b6bbc318d534e77b436f98 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 10:09:55 +0000 Subject: [PATCH 33/36] Bump Freebuff version to 0.0.73 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index b671661d19..05d070015e 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.72", + "version": "0.0.73", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From c77e79f3649c6d5d442e3b56b171ef5a09bba187 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 4 May 2026 03:15:44 -0700 Subject: [PATCH 34/36] Smoke handler: also fall back to sibling-of-execPath lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round 10 still failed Windows because the smoke handler in main() doesn't go through init-node's locateFile callback at all — it calls Parser.init directly, so my init-node sibling fallback (rounds 9-10) never runs during the smoke step. Diagnostic confirmed: at main() time, process.execPath is the disk path on Windows AND the sibling tree-sitter.wasm exists right next to it. Pre-init couldn't reach the file (execPath was bunfs at that phase), so wasmBinary and wasmPath were both empty when smoke ran. Add the sibling lookup directly to the smoke handler, gated on those being empty. By main() time the disk path is reliable, so fs.existsSync(dirname(execPath) + 'tree-sitter.wasm') resolves correctly and we have something to feed Parser.init. Real users (no --smoke-tree-sitter flag) still go through the init-node sibling fallback in the SDK's eager Parser.init — that's unaffected by this change. Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/src/index.tsx | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/cli/src/index.tsx b/cli/src/index.tsx index 549d989380..4eebfa9696 100644 --- a/cli/src/index.tsx +++ b/cli/src/index.tsx @@ -222,23 +222,34 @@ async function main(): Promise { try { const { Parser } = await import('web-tree-sitter') - if (wasmBinary) { - await Parser.init({ wasmBinary }) + // Pick the best wasm source available, falling back to the + // sibling-of-execPath lookup if pre-init couldn't reach it. By + // main() time process.execPath has stabilized to the disk path + // even on Windows, where it was the bunfs path during pre-init. + let effectiveBinary = wasmBinary + let effectivePath = wasmPath + if (!effectiveBinary && !effectivePath && fs.existsSync(siblingPath)) { + effectivePath = siblingPath + effectiveBinary = new Uint8Array(fs.readFileSync(siblingPath)) + } + + if (effectiveBinary) { + await Parser.init({ wasmBinary: effectiveBinary }) // Marker grepped by cli/scripts/smoke-binary.ts — keep this exact text. console.log( - `tree-sitter smoke ok (wasmBinary, ${wasmBinary.byteLength} bytes)`, + `tree-sitter smoke ok (wasmBinary, ${effectiveBinary.byteLength} bytes)`, ) - } else if (wasmPath) { + } else if (effectivePath) { await Parser.init({ locateFile: (name: string) => - name === 'tree-sitter.wasm' ? wasmPath : name, + name === 'tree-sitter.wasm' ? effectivePath! : name, }) - console.log(`tree-sitter smoke ok (locateFile, path=${wasmPath})`) + console.log(`tree-sitter smoke ok (locateFile, path=${effectivePath})`) } else { console.error( - 'tree-sitter smoke FAIL: pre-init published neither globalThis bytes ' + - 'nor an env path. Sibling tree-sitter.wasm not found relative to ' + - 'process.execPath. See diag above for the actual paths.', + 'tree-sitter smoke FAIL: no wasm available — pre-init published ' + + 'nothing and the sibling-of-execPath fallback also missed. See ' + + 'the diag above for paths.', ) process.exit(1) } From 0fbd844d513a07b568431b8f4f201999313f551d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 10:16:28 +0000 Subject: [PATCH 35/36] Bump version to 1.0.666 --- cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/release/package.json b/cli/release/package.json index 318f6b291a..bc40eabd62 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.665", + "version": "1.0.666", "description": "AI coding agent", "license": "MIT", "bin": { From 86ebd09d34451b1d5dc4eefb759d32281a91cb8c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 4 May 2026 10:16:40 +0000 Subject: [PATCH 36/36] Bump Freebuff version to 0.0.74 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index 05d070015e..5c447ced50 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.73", + "version": "0.0.74", "description": "The world's strongest free coding agent", "license": "MIT", "bin": {