From cd8642edf1d8ca48f8357e9bdbc3180c16398089 Mon Sep 17 00:00:00 2001 From: khustup2 Date: Tue, 26 May 2026 01:16:26 +0000 Subject: [PATCH] fix(deps): build tree-sitter from source on linux-arm64 / Node >=22 tree-sitter@0.21.x ships no linux-arm64 prebuild and tree-sitter-typescript@0.23.x ships a mislabeled (x86-64) one, so on arm64 both must be compiled from source. Under Node >=22 that compile requires C++20, which tree-sitter's binding.gyp doesn't request, so a bare `npm install` hard-fails on arm64. - Move tree-sitter + tree-sitter-typescript to optionalDependencies so the expected arm64 build failure no longer aborts the whole install. - Add scripts/ensure-tree-sitter.mjs, a postinstall heal that compiles the bindings from source with CXXFLAGS=-std=c++20 when the shipped binaries fail to load. It removes the absent/wrong-arch prebuilds and rebuilds via node-gyp-build (build/Release is preferred at load time). No-op where prebuilds work (x64/darwin/CI); non-fatal with no toolchain. - Add `rebuild:native` script as a manual escape hatch. --- package-lock.json | 7 ++- package.json | 8 +++- scripts/ensure-tree-sitter.mjs | 82 ++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 4 deletions(-) create mode 100644 scripts/ensure-tree-sitter.mjs diff --git a/package-lock.json b/package-lock.json index 2be1002d..58172126 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7,6 +7,7 @@ "": { "name": "@deeplake/hivemind", "version": "0.7.54", + "hasInstallScript": true, "dependencies": { "@anthropic-ai/sdk": "^0.97.1", "@huggingface/transformers": "^3.0.0", @@ -14,8 +15,6 @@ "deeplake": "^0.3.30", "js-yaml": "^4.1.1", "just-bash": "^2.14.0", - "tree-sitter": "^0.21.1", - "tree-sitter-typescript": "^0.23.2", "yargs-parser": "^22.0.0", "zod": "^4.3.6" }, @@ -37,6 +36,10 @@ }, "engines": { "node": ">=22.0.0" + }, + "optionalDependencies": { + "tree-sitter": "^0.21.1", + "tree-sitter-typescript": "^0.23.2" } }, "node_modules/@anthropic-ai/sdk": { diff --git a/package.json b/package.json index 722f9e85..9d84f052 100644 --- a/package.json +++ b/package.json @@ -41,7 +41,9 @@ "dup": "jscpd src", "audit:openclaw": "node scripts/audit-openclaw-bundle.mjs", "pack:check": "node scripts/pack-check.mjs", + "rebuild:native": "node scripts/ensure-tree-sitter.mjs", "ci": "npm run typecheck && npm run dup && npm test", + "postinstall": "node scripts/ensure-tree-sitter.mjs", "prepare": "husky && npm run build", "prepack": "npm run build" }, @@ -58,11 +60,13 @@ "deeplake": "^0.3.30", "js-yaml": "^4.1.1", "just-bash": "^2.14.0", - "tree-sitter": "^0.21.1", - "tree-sitter-typescript": "^0.23.2", "yargs-parser": "^22.0.0", "zod": "^4.3.6" }, + "optionalDependencies": { + "tree-sitter": "^0.21.1", + "tree-sitter-typescript": "^0.23.2" + }, "devDependencies": { "@types/js-yaml": "^4.0.9", "@types/node": "^25.0.0", diff --git a/scripts/ensure-tree-sitter.mjs b/scripts/ensure-tree-sitter.mjs new file mode 100644 index 00000000..05a381a1 --- /dev/null +++ b/scripts/ensure-tree-sitter.mjs @@ -0,0 +1,82 @@ +#!/usr/bin/env node +// Ensures the native tree-sitter bindings are loadable on this platform / Node ABI. +// +// Why this exists: tree-sitter@0.21.x ships no linux-arm64 prebuild, and +// tree-sitter-typescript@0.23.x ships a mislabeled (x86-64) one. On linux-arm64 +// both must be compiled from source, and under Node >=22 that compile requires C++20 +// (tree-sitter@0.21's binding.gyp does not request it). tree-sitter is declared as an +// optionalDependency so this expected arm64 build failure does not abort `npm install`; +// this script then heals it afterwards. +// +// On platforms where the shipped prebuilds work (x64 / darwin / CI) this is a fast +// no-op and never touches anything. It is intentionally non-fatal: if no toolchain is +// available it warns and exits 0 rather than breaking the install. +import { execSync } from 'node:child_process'; +import { existsSync, readFileSync, rmSync } from 'node:fs'; +import { createRequire } from 'node:module'; + +const ROOT = process.cwd(); +const require = createRequire(`${ROOT}/`); +const PKGS = ['tree-sitter', 'tree-sitter-typescript']; + +function bindingsLoad() { + try { + const Parser = require('tree-sitter'); + const TS = require('tree-sitter-typescript').typescript; + const parser = new Parser(); + parser.setLanguage(TS); + parser.parse('const x = 1;'); + return true; + } catch { + return false; + } +} + +if (process.env.ENSURE_TS_RUNNING) process.exit(0); // recursion guard for the nested npm calls below +if (bindingsLoad()) process.exit(0); // healthy prebuild / prior build → nothing to do + +console.error('[ensure-tree-sitter] native bindings not loadable on this platform — building from source...'); + +const pkg = JSON.parse(readFileSync(`${ROOT}/package.json`, 'utf8')); +const declared = { ...pkg.dependencies, ...pkg.optionalDependencies }; + +const env = { ...process.env, ENSURE_TS_RUNNING: '1' }; +if (process.platform !== 'win32') { + // Node >=22 V8 headers require C++20; tree-sitter@0.21's binding.gyp doesn't request it. + env.CXXFLAGS = `${process.env.CXXFLAGS ?? ''} -std=c++20`.trim(); +} +const run = (cmd) => execSync(cmd, { stdio: 'inherit', env, cwd: ROOT }); + +try { + // 1. Re-fetch any package npm dropped — an optional dependency whose build failed is + // removed from node_modules. --ignore-scripts: fetch only, so the compile below is + // the single source of truth and the project build isn't triggered prematurely. + const missing = PKGS.filter((n) => !existsSync(`${ROOT}/node_modules/${n}/package.json`)); + if (missing.length) { + const specs = missing.map((n) => `${n}@${declared[n] ?? 'latest'}`); + run(`npm install ${specs.join(' ')} --no-save --ignore-scripts`); + } + + // 2. Force a from-source compile. These packages install via node-gyp-build, which uses + // a local prebuild when present and otherwise compiles from source — no network. By + // removing the (absent or wrong-arch) prebuilds plus any stale build, the rebuild is + // guaranteed to compile locally, and node-gyp-build loads build/Release ahead of + // prebuilds, so the correct binary always wins. + for (const n of PKGS) { + rmSync(`${ROOT}/node_modules/${n}/prebuilds`, { recursive: true, force: true }); + rmSync(`${ROOT}/node_modules/${n}/build`, { recursive: true, force: true }); + } + run(`npm rebuild ${PKGS.join(' ')}`); +} catch (err) { + console.error('[ensure-tree-sitter] rebuild command failed:', err.message); +} + +if (bindingsLoad()) { + console.error('[ensure-tree-sitter] OK — bindings compiled from source and loadable.'); + process.exit(0); +} +console.error( + '[ensure-tree-sitter] WARNING: tree-sitter bindings still unavailable. ' + + 'Install a C/C++ toolchain and re-run `npm run rebuild:native`. (non-fatal)', +); +process.exit(0);