From bcb47a9cc9448075700cd6d7ccc023757dd1b99b Mon Sep 17 00:00:00 2001 From: kaghni Date: Fri, 22 May 2026 12:32:07 +0000 Subject: [PATCH 1/6] feat(install): scan offer + concrete-insight banner inline in `hivemind install` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the abstract "🐝 One more step to unlock Hivemind" pitch with a real finding from the user's own session history at install time. The PR #197 banner only fired on the user's SECOND claude session post- install, losing every user who never came back. This surfaces the value-show inline in the install flow itself, right before the sign-in prompt. Flow: 1. After "Installing hivemind X.Y.Z for:..." prints, check guards (claude CLI present, β‰₯1 prior session, no manifest yet, TTY). 2. If guards pass, prompt: "Scan now? [Y/n]" with default Y. 3. On Y, spawn `hivemind skillify mine-local --n 3` as a child of the SAME CLI bundle (process.execPath + process.argv[1]) with stdio silenced and a 90s hard timeout. 4. After child exits, read getLatestInsightEntry from the freshly- written manifest. If non-null, print the insight + minted skill name AND swap the standard "shared memory" pitch for an insight- aware one ("Sign in to keep `` across machines"). 5. Every failure path (declined, timed out, no insight emitted, spawn crashed, missing CLI bundle) falls through to the original unlock copy β€” the install never dead-ends on a scan failure. New file `src/cli/install-scan.ts`: - `canOfferInstallScan()`: guard chain (claude CLI on disk, prior sessions present, no manifest sentinel, TTY implicit at caller) - `runInstallScan()`: spawn worker with timeout, return entry or null - `formatScanResult(entry)`: pure renderer; collapses whitespace, truncates at ~200 chars on word boundary with ellipsis Path resolution happens at CALL time (lazy `homedir()`), not module- load β€” so unit tests can override HOME without an import-cycle. Tests cover every guard branch (6 cases), spawn argv shape, exit- code/error/timeout/missing-cli paths, and formatter invariants (whitespace normalization, truncation, missing-insight fallback). --- src/cli/index.ts | 60 +++++- src/cli/install-scan.ts | 190 +++++++++++++++++ tests/claude-code/install-scan.test.ts | 272 +++++++++++++++++++++++++ 3 files changed, 515 insertions(+), 7 deletions(-) create mode 100644 src/cli/install-scan.ts create mode 100644 tests/claude-code/install-scan.test.ts diff --git a/src/cli/index.ts b/src/cli/index.ts index f71db5ae..5c525f65 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -18,6 +18,7 @@ import { confirm, detectPlatforms, allPlatformIds, log, promptLine, warn, type P import { getVersion } from "./version.js"; import { runUpdate } from "./update.js"; import { renderCliHelpBlock } from "./skillify-spec.js"; +import { canOfferInstallScan, runInstallScan, formatScanResult } from "./install-scan.js"; const AUTH_SUBCOMMANDS = new Set([ "whoami", @@ -178,14 +179,59 @@ async function runAuthGate(args: string[]): Promise { return; } + // Install-time value-show: when the guards pass (claude CLI present, + // prior sessions on disk, no manifest yet, TTY attached), offer to + // scan the user's recent sessions for repeatable mistakes BEFORE + // showing the abstract sign-in pitch. A real insight from their own + // work converts on "keep this skill across machines" better than the + // generic "shared memory" copy. + // + // Every failure path (declined, timed out, no insight emitted) + // returns null and we fall through to the existing unlock copy β€” the + // install never dead-ends on a scan failure. + let foundInsight: { skill_name: string } | null = null; + if (canOfferInstallScan()) { + log(""); + log("🐝 Hivemind installed."); + log(""); + log("Want me to scan your recent Claude Code sessions for repeatable mistakes?"); + log("Takes ~30s. The scan uses your Claude Code subscription."); + log(""); + const scanOk = await confirm("Scan now?", true); + if (scanOk) { + log(""); + log("Scanning your last 3 sessions (up to 90s)…"); + const entry = await runInstallScan(); + if (entry && entry.insight && entry.insight.trim().length > 0) { + log(""); + log(formatScanResult(entry)); + foundInsight = { skill_name: entry.skill_name }; + } else { + log(""); + log("No repeatable patterns found in this scan. (That's OK β€” the gate is conservative.)"); + } + } + } + log(""); - log("🐝 One more step to unlock Hivemind"); - log(""); - log("To enable shared memory and auto-learning across your agents,"); - log("we need to sign you in. Your traces will be securely stored in"); - log("your private Hivemind, so all your agents can recall them."); - log(""); - log("You can later connect your own cloud storage like S3/GCS/Azure Blob."); + if (foundInsight) { + // Insight-aware sign-in pitch: lead with the concrete value the + // user just saw rather than the generic "shared memory" framing. + // Skill name is kebab-case-validated upstream by mine-local's + // assertValidSkillName, so it's safe to interpolate inline. + log("🐝 Sign in to keep this skill across machines and share it with your team."); + log(""); + log(`Without sign-in, \`${foundInsight.skill_name}\` lives only on this machine and`); + log("won't follow you to a new laptop or be shared with teammates who'd benefit."); + } else { + log("🐝 One more step to unlock Hivemind"); + log(""); + log("To enable shared memory and auto-learning across your agents,"); + log("we need to sign you in. Your traces will be securely stored in"); + log("your private Hivemind, so all your agents can recall them."); + log(""); + log("You can later connect your own cloud storage like S3/GCS/Azure Blob."); + } log(""); const yes = await confirm("Sign in now?", true); diff --git a/src/cli/install-scan.ts b/src/cli/install-scan.ts new file mode 100644 index 00000000..736751bb --- /dev/null +++ b/src/cli/install-scan.ts @@ -0,0 +1,190 @@ +/** + * Install-time value-show: scan the user's recent local agent sessions for + * repeatable mistakes and surface a concrete insight inline in the + * `hivemind install` output, BEFORE the auth prompt. + * + * Captures the conversion moment: a fresh installer who declines sign-in + * usually never returns. By showing a real finding from THEIR own work + * up-front, the sign-in CTA becomes "keep this skill across machines" + * instead of the abstract "shared memory" pitch. + * + * Guarded β€” only runs when: + * 1. Claude Code CLI is on disk (the gate runner needs it). + * 2. The user has at least one .jsonl session under ~/.claude/projects/ + * (cold-install users have nothing to mine; we fall through silently). + * 3. No mine-local manifest exists yet (re-installers already mined; the + * sentinel blocks duplicate runs and we don't want to nag them). + * 4. TTY is attached (we need to prompt y/n). + * + * Failure modes (user declined, timed out, gate returned no insight, child + * crashed) all return null β€” caller falls through to the existing + * "🐝 One more step to unlock Hivemind" copy without surfacing an error. + */ + +import { spawn } from "node:child_process"; +import { existsSync, readdirSync } from "node:fs"; +import { homedir } from "node:os"; +import { join } from "node:path"; +import { findAgentBin } from "../skillify/gate-runner.js"; +import { + getLatestInsightEntry, + type LocalManifestEntry, +} from "../skillify/local-manifest.js"; + +/** + * Path roots are resolved at CALL time, not module-load time, so the + * guards honor a HOME override applied after import (the unit tests + * rely on this; production HOME never changes mid-process so the + * runtime cost is irrelevant). + */ +function claudeProjectsDir(): string { + return join(homedir(), ".claude", "projects"); +} +function manifestPath(): string { + return join(homedir(), ".claude", "hivemind", "local-mined.json"); +} + +/** + * Hard cap on the synchronous scan during install. Haiku on ~3 sessions + * typically returns in 30-60s; 90s is the generous ceiling before we + * give up and fall through. Don't bump higher without UX consideration β€” + * a 90s wait already pushes the install latency well past the user's + * normal "what's happening?" patience. + */ +const SCAN_TIMEOUT_MS = 90_000; + +/** + * Sessions to mine on the install-time pass. Tighter than the default + * (8) because we're trading insight quality for install latency. Three + * is empirically enough to surface a pattern for users with active + * coding history; for users with very few sessions, the gate returns + * empty and we fall through. + */ +const INSTALL_SCAN_SESSION_COUNT = 3; + +/** + * Cheap top-level scan: does any `~/.claude/projects/*` subdir contain + * at least one `.jsonl`? We don't recurse into subagent dirs β€” the + * mine-local worker has its own session picker, this guard only needs + * to answer "is there anything to mine?". + */ +function hasLocalClaudeSessions(): boolean { + const projectsDir = claudeProjectsDir(); + if (!existsSync(projectsDir)) return false; + let subdirs: string[]; + try { + subdirs = readdirSync(projectsDir); + } catch { + return false; + } + for (const sub of subdirs) { + let files: string[]; + try { + files = readdirSync(join(projectsDir, sub)); + } catch { + continue; + } + if (files.some(f => f.endsWith(".jsonl"))) return true; + } + return false; +} + +/** + * Guards: every condition that must hold before we even prompt the user + * for a scan. Returning false means "skip the offer entirely, fall + * through to the standard auth copy" β€” no banner, no half-state. + */ +export function canOfferInstallScan(): boolean { + const bin = findAgentBin("claude_code"); + if (!bin || !existsSync(bin)) return false; + if (!hasLocalClaudeSessions()) return false; + if (existsSync(manifestPath())) return false; + return true; +} + +/** + * Spawn the worktree's own `hivemind skillify mine-local` as a detached- + * style child, but await its exit synchronously (with timeout). Using + * `process.execPath` + `process.argv[1]` guarantees we run the SAME CLI + * bundle the user is currently inside β€” no version skew between the + * install flow and the worker that does the mining. + * + * stdio is silenced so the install UX stays clean. mine-local's own + * logs land in `~/.claude/hooks/mine-local.log` for postmortems. + * + * Returns the latest insight-bearing manifest entry if mining produced + * one, or null for every failure path (timeout, non-zero exit, no + * insight in the manifest). + */ +export function runInstallScan(): Promise { + return new Promise((resolve) => { + const cliPath = process.argv[1]; + if (!cliPath || !existsSync(cliPath)) { + resolve(null); + return; + } + const child = spawn( + process.execPath, + [cliPath, "skillify", "mine-local", "--n", String(INSTALL_SCAN_SESSION_COUNT)], + { + stdio: ["ignore", "ignore", "ignore"], + // HIVEMIND_CAPTURE=false: the spawned mine-local would otherwise + // try to capture its own activity, which is a no-op without + // credentials but spams the log. Keep it quiet. + env: { ...process.env, HIVEMIND_CAPTURE: "false" }, + }, + ); + + let settled = false; + const finish = (result: LocalManifestEntry | null): void => { + if (settled) return; + settled = true; + resolve(result); + }; + + const timer = setTimeout(() => { + try { child.kill("SIGKILL"); } catch { /* best-effort */ } + finish(null); + }, SCAN_TIMEOUT_MS); + + child.on("close", (code: number | null) => { + clearTimeout(timer); + if (code !== 0) { finish(null); return; } + // After mine-local exits cleanly, the manifest is written. Read + // the latest insight-bearing entry; null if the gate produced no + // insights (rare but expected for sparse session sets). + try { + finish(getLatestInsightEntry()); + } catch { + finish(null); + } + }); + + child.on("error", () => { + clearTimeout(timer); + finish(null); + }); + }); +} + +/** + * Pure renderer for the post-scan banner. Returns the multi-line block + * the install flow prints when an insight was found. Kept pure so the + * unit test can assert on the rendered output without standing up a + * real mine-local run. + * + * The skill name is rendered as a backticked code span β€” same as the + * SessionStart banner β€” and the insight is truncated to 200 chars so + * a verbose haiku output stays readable inline in the terminal. + */ +export function formatScanResult(entry: LocalManifestEntry): string { + const rawInsight = (entry.insight ?? "").replace(/\s+/g, " ").trim(); + const insight = rawInsight.length > 200 + ? rawInsight.slice(0, 197).replace(/\s\S*$/, "") + "…" + : rawInsight; + return ( + `βœ“ Found a pattern in your past sessions:\n` + + ` πŸ“Œ ${insight}\n` + + ` ✨ Skill \`${entry.skill_name}\` ready to catch it next time` + ); +} diff --git a/tests/claude-code/install-scan.test.ts b/tests/claude-code/install-scan.test.ts new file mode 100644 index 00000000..b3e1fc0c --- /dev/null +++ b/tests/claude-code/install-scan.test.ts @@ -0,0 +1,272 @@ +/** + * Unit tests for src/cli/install-scan.ts β€” the install-time value-show + * scan helpers. The scan itself spawns mine-local, which we mock at + * the child_process boundary so tests stay fast and deterministic. + * + * Coverage targets: canOfferInstallScan guard chain (every false-path, + * plus the all-conditions-met true-path); runInstallScan timeout + + * happy path + manifest read; formatScanResult rendering invariants. + */ + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { EventEmitter } from "node:events"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir, homedir } from "node:os"; +import { join } from "node:path"; + +// child_process.spawn is mocked so the "scan" test doesn't actually +// invoke node β€” we drive child events from inside the test. +type SpawnCall = { cmd: string; args: string[] }; +const spawnCalls: SpawnCall[] = []; +let nextChildBehavior: { + exitCode?: number; + emitError?: Error; + delayMs?: number; +} = { exitCode: 0 }; + +vi.mock("node:child_process", () => ({ + spawn: vi.fn((cmd: string, args: string[]) => { + spawnCalls.push({ cmd, args }); + const child = new EventEmitter() as any; + child.kill = vi.fn(); + const behavior = nextChildBehavior; + queueMicrotask(() => { + const delay = behavior.delayMs ?? 0; + const fire = () => { + if (behavior.emitError) { + child.emit("error", behavior.emitError); + } else { + child.emit("close", behavior.exitCode ?? 0); + } + }; + if (delay > 0) setTimeout(fire, delay); + else fire(); + }); + return child; + }), +})); + +// findAgentBin returns a path that may or may not exist on disk β€” we +// drive it directly so tests don't depend on the developer's PATH. +let findAgentBinReturn: string | null = "/tmp/fake-claude-bin"; +vi.mock("../../src/skillify/gate-runner.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + findAgentBin: (..._a: unknown[]) => findAgentBinReturn, + }; +}); + +// getLatestInsightEntry is mocked so the runInstallScan path doesn't +// read the developer's real ~/.claude/hivemind/local-mined.json. +let nextInsightEntry: any = null; +vi.mock("../../src/skillify/local-manifest.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + getLatestInsightEntry: () => nextInsightEntry, + // canOfferInstallScan reads LOCAL_MANIFEST_PATH existence directly; + // we keep the real export so tests can choose a tmp manifest path + // by setting HOME via process.env.HOME below. + }; +}); + +import { + canOfferInstallScan, + formatScanResult, + runInstallScan, +} from "../../src/cli/install-scan.js"; + +const TMP_HOME = mkdtempSync(join(tmpdir(), "install-scan-test-")); +const originalHome = process.env.HOME; +const originalArgv1 = process.argv[1]; +const FAKE_CLI = join(TMP_HOME, "fake-cli.js"); + +beforeEach(() => { + // Reset state between tests. + spawnCalls.length = 0; + nextChildBehavior = { exitCode: 0 }; + findAgentBinReturn = "/tmp/fake-claude-bin"; + nextInsightEntry = null; + // Each test starts with a clean tmp HOME: no sessions, no manifest. + rmSync(TMP_HOME, { recursive: true, force: true }); + mkdirSync(TMP_HOME, { recursive: true }); + process.env.HOME = TMP_HOME; + // process.argv[1] is what runInstallScan spawns. Point it at a real + // file so the existsSync check passes; the actual content doesn't + // matter because spawn is mocked. + writeFileSync(FAKE_CLI, "// fake cli", "utf-8"); + process.argv[1] = FAKE_CLI; + // Also ensure the mocked "claude bin" exists on disk for the guard. + writeFileSync("/tmp/fake-claude-bin", "// fake claude", "utf-8"); +}); + +afterEach(() => { + process.env.HOME = originalHome; + process.argv[1] = originalArgv1; + try { rmSync("/tmp/fake-claude-bin"); } catch { /* best-effort */ } +}); + +describe("canOfferInstallScan", () => { + function seedSession(): void { + const projectsDir = join(TMP_HOME, ".claude", "projects", "sample-proj"); + mkdirSync(projectsDir, { recursive: true }); + writeFileSync(join(projectsDir, "abc.jsonl"), "{}\n", "utf-8"); + } + + it("returns false when no claude CLI is present (gate runner needs it)", () => { + findAgentBinReturn = null; + seedSession(); + expect(canOfferInstallScan()).toBe(false); + }); + + it("returns false when findAgentBin returns a path that doesn't exist", () => { + findAgentBinReturn = "/nonexistent/claude"; + seedSession(); + expect(canOfferInstallScan()).toBe(false); + }); + + it("returns false when ~/.claude/projects is missing (truly fresh claude install)", () => { + // No sessions seeded β†’ cold-install path β†’ no scan offer. + expect(canOfferInstallScan()).toBe(false); + }); + + it("returns false when ~/.claude/projects exists but contains no .jsonl files", () => { + // Subdir exists but is empty β€” the recursive scan finds nothing + // and we don't waste user attention on a doomed offer. + mkdirSync(join(TMP_HOME, ".claude", "projects", "empty"), { recursive: true }); + expect(canOfferInstallScan()).toBe(false); + }); + + it("returns false when the mine-local manifest already exists (re-installer)", () => { + seedSession(); + const manifestDir = join(TMP_HOME, ".claude", "hivemind"); + mkdirSync(manifestDir, { recursive: true }); + writeFileSync(join(manifestDir, "local-mined.json"), "{}\n", "utf-8"); + expect(canOfferInstallScan()).toBe(false); + }); + + it("returns true when all guards pass: claude CLI + sessions + no manifest", () => { + seedSession(); + expect(canOfferInstallScan()).toBe(true); + }); +}); + +describe("runInstallScan", () => { + it("spawns `skillify mine-local --n 3` against the same CLI bundle the install ran from", async () => { + nextChildBehavior = { exitCode: 0 }; + await runInstallScan(); + expect(spawnCalls).toHaveLength(1); + const { cmd, args } = spawnCalls[0]; + expect(cmd).toBe(process.execPath); + // Always spawns OUR cli bundle, never `which hivemind`, so the + // worker is the same version as the parent install process. + expect(args[0]).toBe(FAKE_CLI); + expect(args).toContain("skillify"); + expect(args).toContain("mine-local"); + // `--n 3` is the tight install-time session cap (vs default 8). + expect(args).toContain("--n"); + expect(args[args.indexOf("--n") + 1]).toBe("3"); + }); + + it("resolves with the latest insight entry on clean exit when one exists", async () => { + nextInsightEntry = { + skill_name: "verify-before-done", + insight: "You revisited 4 merged PRs in the last month.", + created_at: "2026-05-22T10:00:00.000Z", + }; + nextChildBehavior = { exitCode: 0 }; + const result = await runInstallScan(); + expect(result).not.toBeNull(); + expect(result!.skill_name).toBe("verify-before-done"); + }); + + it("resolves with null on non-zero exit", async () => { + nextChildBehavior = { exitCode: 1 }; + nextInsightEntry = { skill_name: "x", insight: "y", created_at: "z" }; + // Even if a manifest exists, a failed mine-local run shouldn't + // surface its (possibly stale) output. We treat non-zero exit as + // "scan failed β†’ fall through silently". + const result = await runInstallScan(); + expect(result).toBeNull(); + }); + + it("resolves with null on spawn error", async () => { + nextChildBehavior = { emitError: new Error("ENOENT") }; + const result = await runInstallScan(); + expect(result).toBeNull(); + }); + + it("resolves with null when process.argv[1] points at a missing file (safety guard)", async () => { + process.argv[1] = "/nonexistent/cli.js"; + const result = await runInstallScan(); + // Spawn should NOT have been called β€” we bail out at the + // existsSync check rather than letting node fail mid-spawn. + expect(result).toBeNull(); + expect(spawnCalls).toHaveLength(0); + }); + + it("resolves with null when getLatestInsightEntry returns null (gate produced no insight)", async () => { + nextChildBehavior = { exitCode: 0 }; + nextInsightEntry = null; + const result = await runInstallScan(); + expect(result).toBeNull(); + }); +}); + +describe("formatScanResult", () => { + function makeEntry(insight: string, name = "verify-before-done"): any { + return { + skill_name: name, + insight, + created_at: "2026-05-22T00:00:00.000Z", + }; + } + + it("renders insight, skill name, and emoji markers", () => { + const out = formatScanResult(makeEntry("You revisited 4 merged PRs.")); + expect(out).toContain("Found a pattern in your past sessions"); + expect(out).toContain("πŸ“Œ You revisited 4 merged PRs."); + expect(out).toContain("✨ Skill `verify-before-done` ready"); + }); + + it("collapses embedded whitespace (newlines, tabs) to single spaces", () => { + // Defense-in-depth: parseMultiVerdict already normalizes whitespace + // before persistence, but this renderer is the last guard before + // user-visible output and must not blindly trust the input. + const out = formatScanResult(makeEntry("Line one.\nLine\ttwo. Three.")); + expect(out).toContain("πŸ“Œ Line one. Line two. Three."); + expect(out).not.toContain("\nLine two"); + expect(out).not.toContain("\t"); + }); + + it("truncates insight over 200 chars at a word boundary with ellipsis", () => { + const long = "x ".repeat(200).trim() + " end-marker"; + const out = formatScanResult(makeEntry(long)); + const insightLine = out.split("\n").find(l => l.includes("πŸ“Œ"))!; + // Allow a few chars of slack for the emoji + leading spaces. + expect(insightLine.length).toBeLessThanOrEqual(220); + expect(insightLine.endsWith("…")).toBe(true); + // end-marker is past the 200-char cap, must be dropped. + expect(insightLine).not.toContain("end-marker"); + }); + + it("passes through short insights without truncation", () => { + const out = formatScanResult(makeEntry("Short and concrete.")); + expect(out).toContain("Short and concrete."); + expect(out).not.toContain("…"); + }); + + it("handles missing insight gracefully (empty string, not crash)", () => { + // Should never happen in practice β€” caller checks for non-empty + // insight before calling formatScanResult β€” but defensive coding + // means a malformed entry doesn't crash the install. + const out = formatScanResult({ + skill_name: "x", + insight: undefined as any, + created_at: "z", + } as any); + expect(out).toContain("πŸ“Œ"); + expect(out).toContain("✨ Skill `x`"); + }); +}); From d40faee7e9e0b923a3a6e08c9828d59b5f013186 Mon Sep 17 00:00:00 2001 From: kaghni Date: Fri, 22 May 2026 12:46:04 +0000 Subject: [PATCH 2/6] review: don't disable auto-mine on empty scan + scope to claude_code (codex) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two codex findings on PR #198, both addressed: P1 β€” install-scan would leave behind a sentinel manifest (entries: []) when mine-local found no insights, permanently disabling future maybeAutoMineLocal() background runs. Fix: after the spawned worker exits, if getLatestInsightEntry returns null, unlinkSync the manifest so background mining can retry as the user accumulates more sessions. canOfferInstallScan() guards against a pre-existing manifest, so any file present after the spawn came from THIS scan run β€” safe to remove. P2 β€” install prompt advertised "scan your Claude Code sessions" but mine-local's session picker walks every installed agent (Codex, Cursor, Hermes, pi) and could surface an insight from a Codex session despite what we promised. Privacy surprise + inaccurate copy. Fix: add `--only ` flag to mine-local that filters detectInstalledAgents() to the requested agent (or exits 1 if the filtered set is empty), then pass `--only claude_code` from runInstallScan. Tests: - install-scan: spawn argv now includes --only claude_code (regression guard); manifest removed on null-insight path; manifest preserved when an insight WAS produced. - mine-local: --only claude_code filters the install list correctly and logs the filter; --only with no matching install exits 1 with a clear error. --- src/cli/install-scan.ts | 35 +++++++++++++--- src/commands/mine-local.ts | 20 +++++++-- tests/claude-code/install-scan.test.ts | 42 ++++++++++++++++++- .../mine-local-orchestrator.test.ts | 33 +++++++++++++++ 4 files changed, 119 insertions(+), 11 deletions(-) diff --git a/src/cli/install-scan.ts b/src/cli/install-scan.ts index 736751bb..197633ac 100644 --- a/src/cli/install-scan.ts +++ b/src/cli/install-scan.ts @@ -22,7 +22,7 @@ */ import { spawn } from "node:child_process"; -import { existsSync, readdirSync } from "node:fs"; +import { existsSync, readdirSync, unlinkSync } from "node:fs"; import { homedir } from "node:os"; import { join } from "node:path"; import { findAgentBin } from "../skillify/gate-runner.js"; @@ -125,7 +125,20 @@ export function runInstallScan(): Promise { } const child = spawn( process.execPath, - [cliPath, "skillify", "mine-local", "--n", String(INSTALL_SCAN_SESSION_COUNT)], + [ + cliPath, + "skillify", + "mine-local", + "--n", + String(INSTALL_SCAN_SESSION_COUNT), + // The install copy advertises a "Claude Code" scan, so filter + // the mine-local picker to claude_code sessions. Without this, + // mine-local walks every installed agent (Codex, Cursor, + // Hermes, pi) and could surface an insight from a Codex + // session despite what we promised β€” codex PR #198 P2. + "--only", + "claude_code", + ], { stdio: ["ignore", "ignore", "ignore"], // HIVEMIND_CAPTURE=false: the spawned mine-local would otherwise @@ -153,11 +166,21 @@ export function runInstallScan(): Promise { // After mine-local exits cleanly, the manifest is written. Read // the latest insight-bearing entry; null if the gate produced no // insights (rare but expected for sparse session sets). - try { - finish(getLatestInsightEntry()); - } catch { - finish(null); + let entry: LocalManifestEntry | null = null; + try { entry = getLatestInsightEntry(); } catch { /* keep null */ } + if (!entry) { + // mine-local writes a sentinel manifest (often with + // `entries: []`) even when no insights were produced. That + // sentinel is what `maybeAutoMineLocal()` uses to skip future + // background mining β€” leaving it behind permanently disables + // auto-mining for users whose install-time scan happened to + // be low-signal. Unlink the manifest so the background path + // can retry as their history accumulates. canOfferInstallScan + // guarantees there was no pre-existing manifest, so anything + // here came from THIS spawn β€” safe to remove. (codex PR #198 P1) + try { unlinkSync(manifestPath()); } catch { /* best-effort */ } } + finish(entry); }); child.on("error", () => { diff --git a/src/commands/mine-local.ts b/src/commands/mine-local.ts index 8d4fc5e5..893d46eb 100644 --- a/src/commands/mine-local.ts +++ b/src/commands/mine-local.ts @@ -478,6 +478,7 @@ async function runMineLocalImpl(args: string[]): Promise { const force = takeBoolFlag(work, "--force"); const dryRun = takeBoolFlag(work, "--dry-run"); const nRaw = takeFlagValue(work, "--n"); + const onlyAgent = takeFlagValue(work, "--only"); if (loadManifest() && !force) { console.error(`Local skills have already been mined on this machine.`); @@ -486,12 +487,25 @@ async function runMineLocalImpl(args: string[]): Promise { process.exit(1); } - const installs = detectInstalledAgents(); - if (installs.length === 0) { + const installsAll = detectInstalledAgents(); + if (installsAll.length === 0) { console.error(`No agent session directories detected. Run a session first.`); process.exit(1); } - console.log(`Detected installed agents: ${installs.map(i => i.agent).join(", ")}`); + // Optional agent filter: scope mining to a single agent's sessions + // (e.g. `--only claude_code`). Used by the install-time scan to + // honor a "scan your Claude Code sessions" promise β€” without this + // filter, the picker walks every installed agent's sessions and + // could surface an insight from Codex / Cursor / etc when only + // Claude Code was advertised (codex PR #198 P2 finding). + const installs = onlyAgent + ? installsAll.filter(i => i.agent === onlyAgent) + : installsAll; + if (installs.length === 0) { + console.error(`No '${onlyAgent}' session directory detected. Skipping mine-local.`); + process.exit(1); + } + console.log(`Detected installed agents: ${installs.map(i => i.agent).join(", ")}${onlyAgent ? ` (filtered to ${onlyAgent})` : ""}`); const host = detectHostAgent(); const fallback = installs[0].agent; diff --git a/tests/claude-code/install-scan.test.ts b/tests/claude-code/install-scan.test.ts index b3e1fc0c..567750c8 100644 --- a/tests/claude-code/install-scan.test.ts +++ b/tests/claude-code/install-scan.test.ts @@ -10,7 +10,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { EventEmitter } from "node:events"; -import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { existsSync as existsSyncReal, mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; import { tmpdir, homedir } from "node:os"; import { join } from "node:path"; @@ -153,7 +153,7 @@ describe("canOfferInstallScan", () => { }); describe("runInstallScan", () => { - it("spawns `skillify mine-local --n 3` against the same CLI bundle the install ran from", async () => { + it("spawns `skillify mine-local --n 3 --only claude_code` against the same CLI bundle the install ran from", async () => { nextChildBehavior = { exitCode: 0 }; await runInstallScan(); expect(spawnCalls).toHaveLength(1); @@ -167,6 +167,44 @@ describe("runInstallScan", () => { // `--n 3` is the tight install-time session cap (vs default 8). expect(args).toContain("--n"); expect(args[args.indexOf("--n") + 1]).toBe("3"); + // `--only claude_code` honors the "scan your Claude Code sessions" + // copy β€” without it, mine-local would walk every installed agent + // and could surface an insight from Codex / Cursor (codex PR #198 + // P2). Regression guard. + expect(args).toContain("--only"); + expect(args[args.indexOf("--only") + 1]).toBe("claude_code"); + }); + + it("deletes the manifest written by mine-local when no insight was produced (P1 guard)", async () => { + // codex PR #198 P1: mine-local writes a sentinel manifest even + // when 0 insights were found. That sentinel permanently disables + // future maybeAutoMineLocal() runs. We unlink it so background + // mining can retry as history accumulates. + const manifestPath = join(TMP_HOME, ".claude", "hivemind", "local-mined.json"); + mkdirSync(join(TMP_HOME, ".claude", "hivemind"), { recursive: true }); + writeFileSync(manifestPath, JSON.stringify({ created_at: "x", entries: [] })); + nextChildBehavior = { exitCode: 0 }; + nextInsightEntry = null; // mine-local wrote the empty manifest + const result = await runInstallScan(); + expect(result).toBeNull(); + // Manifest must be gone so future background auto-mine can retry. + expect(existsSyncReal(manifestPath)).toBe(false); + }); + + it("preserves the manifest when an insight WAS produced", async () => { + const manifestPath = join(TMP_HOME, ".claude", "hivemind", "local-mined.json"); + mkdirSync(join(TMP_HOME, ".claude", "hivemind"), { recursive: true }); + writeFileSync(manifestPath, JSON.stringify({ + created_at: "x", + entries: [{ skill_name: "k", insight: "i", created_at: "z" }], + })); + nextChildBehavior = { exitCode: 0 }; + nextInsightEntry = { skill_name: "k", insight: "i", created_at: "z" }; + const result = await runInstallScan(); + expect(result).not.toBeNull(); + // Real manifest should still be there β€” we want the insight to + // persist for the user-visible banner on next SessionStart. + expect(existsSyncReal(manifestPath)).toBe(true); }); it("resolves with the latest insight entry on clean exit when one exists", async () => { diff --git a/tests/claude-code/mine-local-orchestrator.test.ts b/tests/claude-code/mine-local-orchestrator.test.ts index 7844adaf..2a181ca8 100644 --- a/tests/claude-code/mine-local-orchestrator.test.ts +++ b/tests/claude-code/mine-local-orchestrator.test.ts @@ -452,6 +452,39 @@ describe("runMineLocal: orchestrator branches", () => { expect(errSpy).toHaveBeenCalledWith(expect.stringContaining("mine-local v1 requires the Claude Code CLI")); }); + it("--only filters detectInstalledAgents to the requested agent", async () => { + // codex PR #198 P2 regression guard: the install-time scan + // advertises "scan your Claude Code sessions" but mine-local + // walks every installed agent by default. The --only flag scopes + // the picker so the prompt copy is honest. + const old = Date.now() - 5 * 60_000; + detectInstalledAgents.mockReturnValueOnce([ + { agent: "codex", sessionRoot: "/c", encodeCwd: () => "x" }, + { agent: "claude_code", sessionRoot: "/cc", encodeCwd: () => "x" }, + ]); + // listLocalSessions should be called with ONLY the claude_code + // install when --only claude_code is passed. + listLocalSessions.mockImplementationOnce((installs: any) => { + expect(installs).toHaveLength(1); + expect(installs[0].agent).toBe("claude_code"); + return [makeSession("a", old, "claude_code")]; + }); + const mod = await importOrch(); + await mod.runMineLocal(["--only", "claude_code", "--dry-run"]); + // Confirm the log mentions the filter so users can see what + // happened. + expect(logSpy).toHaveBeenCalledWith(expect.stringContaining("filtered to claude_code")); + }); + + it("--only with no matching install β†’ exit 1 with clear error", async () => { + detectInstalledAgents.mockReturnValueOnce([ + { agent: "codex", sessionRoot: "/c", encodeCwd: () => "x" }, + ]); + const mod = await importOrch(); + await expect(mod.runMineLocal(["--only", "claude_code"])).rejects.toThrow("__exit_1__"); + expect(errSpy).toHaveBeenCalledWith(expect.stringContaining("No 'claude_code' session directory detected")); + }); + it("host = codex but claude_code installed β†’ mining uses claude_code as gate", async () => { const old = Date.now() - 5 * 60_000; detectHostAgent.mockReturnValueOnce("codex"); From 45da6475debebd9a79ba14edf8080920a57ef55e Mon Sep 17 00:00:00 2001 From: kaghni Date: Sat, 23 May 2026 08:01:01 +0000 Subject: [PATCH 3/6] =?UTF-8?q?feat(install):=20advisor=20pattern=20+=20sc?= =?UTF-8?q?an=20polish=20=E2=80=94=20sonnet=20ranks=20haiku=20candidates?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Real-world install-scan testing showed insight quality varies wildly across haiku candidates. With --n 5 on a machine whose newest sessions were conversational/planning content, the picker found nothing useful; with --n 20 the gate produced 18 candidates but the naive newest-by- created_at pick was a generic "watch for ENOTEMPTY" advisory, while the next-best candidate was the concrete "your hivemind CLI broke twice in 24 hours on May 19 and May 20 from concurrent autoUpdate()" pattern. The quality floor is haiku's; the quality CEILING is whichever candidate happens to be written last. That's not good enough for the install-moment surface. Advisor pattern (executor/advisor from the Anthropic docs): - Executor: haiku, parallel Γ—N, produces 0-3 candidates per session - Advisor: sonnet, single call, reads ALL candidates, picks best by quality criteria, marks the winner with `primary: true` New file `src/skillify/advisor.ts`: - runAdvisor(manifestPath): reads manifest, identifies insight- bearing candidates, spawns claude --model sonnet with a strict PICK / REJECT_ALL prompt, marks the picked entry as primary - parseAdvisorOutput: lenient parser for PICK: N or REJECT_ALL: reason; defensive against prose preludes; never blind-picks on unparseable output - Trivial-pick fast path: single-candidate manifests skip the sonnet call entirely - Idempotent: clears prior primary markings before applying the new pick Manifest schema: - LocalManifestEntry gains optional `primary: boolean` - getLatestInsightEntry prefers primary entries; falls back to newest-by-created_at when none marked (legacy behavior intact) install-scan integration: - After mine-local spawn closes successfully, runAdvisor() runs in the same async pass before getLatestInsightEntry is read. So the surfaced insight is the advisor's pick, not the recency pick - Advisor failures (timeout, missing CLI, REJECT_ALL) fall through silently β€” the manifest's recency tiebreak still works Other polish from real-world testing: - Truncation 200 β†’ 280 chars: a haiku insight was cut mid-sentence at 200, losing the punchline. 280 matches the parseMultiVerdict storage cap so we never truncate beyond what's persisted - --n 5 β†’ 20: the recency-biased picker on machines with rich history needs the epsilon-greedy random tail (30% of picks) to reach beyond the newest conversational sessions into the real coding work. Timeout bumped 90s β†’ 300s to match - Install copy: drop the misleading "🐝 Hivemind installed." line that printed before per-platform installs ran (codex P3) - P2 manifest delete: only delete the manifest when entries is LITERALLY empty (entries: []), not when skills were written but none have an insight β€” preserves count-banner data - Manifest-truly-empty helper distinguishes the two cases via a cheap JSON read A/B test on real al-projects sessions: - Mining produced 21 skills, 18 with insights - Naive pick: "npm-global-install-staging-cleanup" β€” generic ENOTEMPTY advisory - Advisor pick: "npm-global-install-race-diagnosis-and-lock-fix" β€” concrete (2 incidents, May 19 17:39 and May 20 01:20), counted (specific timestamps), 2nd-person, names the failure mechanism, actionable Tests: - advisor.test.ts: PICK / REJECT_ALL / unparseable parsing; spawn invokes --model sonnet; primary marked + idempotent on re-run; no-CLI / no-candidates / trivial-pick fast paths - install-scan.test.ts: bumped expected --n and truncation cap; mock runAdvisor so install-scan tests stay focused --- src/cli/index.ts | 12 +- src/cli/install-scan.ts | 96 ++++++--- src/skillify/advisor.ts | 230 +++++++++++++++++++++ src/skillify/local-manifest.ts | 25 ++- tests/claude-code/advisor.test.ts | 266 +++++++++++++++++++++++++ tests/claude-code/install-scan.test.ts | 64 ++++-- 6 files changed, 640 insertions(+), 53 deletions(-) create mode 100644 src/skillify/advisor.ts create mode 100644 tests/claude-code/advisor.test.ts diff --git a/src/cli/index.ts b/src/cli/index.ts index 5c525f65..800f6d6c 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -191,16 +191,18 @@ async function runAuthGate(args: string[]): Promise { // install never dead-ends on a scan failure. let foundInsight: { skill_name: string } | null = null; if (canOfferInstallScan()) { + // Don't claim "Hivemind installed" here β€” runAuthGate runs BEFORE + // the per-platform installers, so the assistant install hasn't + // actually happened yet. Codex PR #198 P3 flagged the earlier + // wording as misleading status output. log(""); - log("🐝 Hivemind installed."); - log(""); - log("Want me to scan your recent Claude Code sessions for repeatable mistakes?"); - log("Takes ~30s. The scan uses your Claude Code subscription."); + log("🐝 Want me to scan your recent Claude Code sessions for repeatable mistakes?"); + log("Takes 2-5 minutes. Scans 20 sessions in parallel using your Claude Code subscription."); log(""); const scanOk = await confirm("Scan now?", true); if (scanOk) { log(""); - log("Scanning your last 3 sessions (up to 90s)…"); + log("Scanning your 20 most-recent sessions (up to 5 min). Be patient β€” haiku is running in the background."); const entry = await runInstallScan(); if (entry && entry.insight && entry.insight.trim().length > 0) { log(""); diff --git a/src/cli/install-scan.ts b/src/cli/install-scan.ts index 197633ac..b38808ed 100644 --- a/src/cli/install-scan.ts +++ b/src/cli/install-scan.ts @@ -22,7 +22,7 @@ */ import { spawn } from "node:child_process"; -import { existsSync, readdirSync, unlinkSync } from "node:fs"; +import { existsSync, readFileSync, readdirSync, unlinkSync } from "node:fs"; import { homedir } from "node:os"; import { join } from "node:path"; import { findAgentBin } from "../skillify/gate-runner.js"; @@ -30,6 +30,7 @@ import { getLatestInsightEntry, type LocalManifestEntry, } from "../skillify/local-manifest.js"; +import { runAdvisor } from "../skillify/advisor.js"; /** * Path roots are resolved at CALL time, not module-load time, so the @@ -45,22 +46,27 @@ function manifestPath(): string { } /** - * Hard cap on the synchronous scan during install. Haiku on ~3 sessions - * typically returns in 30-60s; 90s is the generous ceiling before we - * give up and fall through. Don't bump higher without UX consideration β€” - * a 90s wait already pushes the install latency well past the user's - * normal "what's happening?" patience. + * Hard cap on the synchronous scan during install. With session count + * = 20 and concurrency = 4, haiku runs 5 sequential batches at ~30-60s + * each β†’ realistic wall clock 150-300s. 5 minutes is the ceiling + * before we give up and fall through. The user opted into the wait + * via the y/n prompt, so a long-but-bounded scan is acceptable; the + * alternative (3-5 sessions) was empirically too few to escape the + * recency-biased picker's tendency to pick conversational meta- + * sessions on machines with rich history. */ -const SCAN_TIMEOUT_MS = 90_000; +const SCAN_TIMEOUT_MS = 300_000; /** - * Sessions to mine on the install-time pass. Tighter than the default - * (8) because we're trading insight quality for install latency. Three - * is empirically enough to surface a pattern for users with active - * coding history; for users with very few sessions, the gate returns - * empty and we fall through. + * Sessions to mine on the install-time pass. Bumped from 5 β†’ 20 after + * real-world testing: on a machine with ~340 sessions where the newest + * dozen are all conversational/planning content (no coding mistakes), + * --n 5 returned zero insights. The picker uses epsilon-greedy + * (Ξ΅=0.3) so ~6 of 20 picks are random β€” high enough to reach back + * into history and surface actual coding sessions where real + * repeatable-mistake patterns live. */ -const INSTALL_SCAN_SESSION_COUNT = 3; +const INSTALL_SCAN_SESSION_COUNT = 20; /** * Cheap top-level scan: does any `~/.claude/projects/*` subdir contain @@ -68,6 +74,24 @@ const INSTALL_SCAN_SESSION_COUNT = 3; * mine-local worker has its own session picker, this guard only needs * to answer "is there anything to mine?". */ +/** + * Read the manifest mine-local just wrote and return true ONLY when + * the entries array is empty. Distinguishes the "no skills written" + * sentinel from a count-only mine (skills written, none with insight). + * Returns false on any read/parse error to be safe β€” better to leave + * a malformed file alone than delete real data we couldn't decode. + */ +function manifestIsTrulyEmpty(): boolean { + const p = manifestPath(); + if (!existsSync(p)) return false; + try { + const m = JSON.parse(readFileSync(p, "utf-8")) as { entries?: unknown }; + return Array.isArray(m.entries) && m.entries.length === 0; + } catch { + return false; + } +} + function hasLocalClaudeSessions(): boolean { const projectsDir = claudeProjectsDir(); if (!existsSync(projectsDir)) return false; @@ -160,24 +184,34 @@ export function runInstallScan(): Promise { finish(null); }, SCAN_TIMEOUT_MS); - child.on("close", (code: number | null) => { + child.on("close", async (code: number | null) => { clearTimeout(timer); if (code !== 0) { finish(null); return; } - // After mine-local exits cleanly, the manifest is written. Read - // the latest insight-bearing entry; null if the gate produced no - // insights (rare but expected for sparse session sets). + // After mine-local exits cleanly, the manifest is written. Run + // the advisor (sonnet) over all insight-bearing candidates to + // mark the BEST one as primary. The A/B comparison showed a + // significant jump in surfaced-insight quality with the advisor + // pass: it consistently rejects meta-noise / vague candidates + // and picks the most concrete + counted finding. Falls through + // silently on any advisor error (timeout, no claude CLI, sonnet + // rejects all) β€” getLatestInsightEntry just uses the recency + // tiebreak. + try { await runAdvisor(); } catch { /* fall through to recency pick */ } let entry: LocalManifestEntry | null = null; try { entry = getLatestInsightEntry(); } catch { /* keep null */ } - if (!entry) { - // mine-local writes a sentinel manifest (often with - // `entries: []`) even when no insights were produced. That - // sentinel is what `maybeAutoMineLocal()` uses to skip future - // background mining β€” leaving it behind permanently disables - // auto-mining for users whose install-time scan happened to - // be low-signal. Unlink the manifest so the background path - // can retry as their history accumulates. canOfferInstallScan - // guarantees there was no pre-existing manifest, so anything - // here came from THIS spawn β€” safe to remove. (codex PR #198 P1) + if (!entry && manifestIsTrulyEmpty()) { + // ONLY delete the manifest when mine-local wrote a literal + // empty sentinel (entries: []). When mine-local DID produce + // skills but the gate omitted `insight` on all of them, the + // manifest still has value β€” countLocalManifestEntries() will + // surface the count via the legacy SessionStart banner branch, + // and a future `push-local` flow needs the row metadata. The + // earlier blanket-unlink path was over-aggressive (codex + // PR #198 P2): it told users "no patterns found" even when + // skills had been written, and re-armed the background auto- + // mine for the next session unnecessarily. canOfferInstallScan + // guarantees there was no pre-existing manifest, so an empty + // sentinel here is definitively from THIS spawn. try { unlinkSync(manifestPath()); } catch { /* best-effort */ } } finish(entry); @@ -202,8 +236,12 @@ export function runInstallScan(): Promise { */ export function formatScanResult(entry: LocalManifestEntry): string { const rawInsight = (entry.insight ?? "").replace(/\s+/g, " ").trim(); - const insight = rawInsight.length > 200 - ? rawInsight.slice(0, 197).replace(/\s\S*$/, "") + "…" + // Cap at 280 chars (same as the parseMultiVerdict storage cap), so we + // never truncate beyond what was persisted. The earlier 200-char cap + // sometimes lost the punchline of haiku's insights mid-sentence β€” + // 280 is the longest a stored insight can ever be. + const insight = rawInsight.length > 280 + ? rawInsight.slice(0, 277).replace(/\s\S*$/, "") + "…" : rawInsight; return ( `βœ“ Found a pattern in your past sessions:\n` + diff --git a/src/skillify/advisor.ts b/src/skillify/advisor.ts new file mode 100644 index 00000000..942b2943 --- /dev/null +++ b/src/skillify/advisor.ts @@ -0,0 +1,230 @@ +/** + * Insight advisor β€” runs after a mine-local pass to pick the BEST + * insight-bearing candidate from a manifest. Calls sonnet via the + * user's claude CLI (haiku is the executor that produces N candidates + * in parallel; sonnet is the advisor that selects one). + * + * Pattern (per Anthropic's "Executor/Advisor"): + * - Executor (haiku, parallel Γ— N): cheap, runs once per session, + * produces 0-3 skill candidates each. No quality floor β€” haiku + * sometimes emits meta-commentary ("user asked to save this rule") + * because the session it saw was a meta-conversation, not coding. + * - Advisor (sonnet, single call): reads ALL candidates, ranks by + * quality criteria, marks the winner with `primary: true` in the + * manifest. If every candidate is meta-noise / vague, returns + * null (no banner surfaces β€” falls back to the count line). + * + * Cost: one sonnet call per mine-local run (~$0.10-0.30 with the + * candidate set sizes we cap at). Worth the spend at install time, + * where surface attention is the scarce resource and we get one shot + * at the impression. + * + * The advisor is intentionally separate from gate-runner so the + * haiku-parallel path stays untouched. We invoke `claude --model sonnet` + * directly with stdin-fed prompt + parse a deterministic single-line + * response. + */ + +import { spawn } from "node:child_process"; +import { existsSync } from "node:fs"; +import { findAgentBin } from "./gate-runner.js"; +import { + LOCAL_MANIFEST_PATH, + readLocalManifest, + writeLocalManifest, + type LocalManifestEntry, +} from "./local-manifest.js"; + +/** + * Hard cap on the advisor call. Sonnet on a small candidate list + * (typically 4-15 entries) returns in ~5-20s. 60s ceiling before we + * give up and leave the manifest unmarked β€” the rule will fall back + * to the recency-based pick. + */ +const ADVISOR_TIMEOUT_MS = 60_000; + +/** + * Maximum candidates we send to the advisor. Mine-local typically + * produces 0-3 candidates per session, capped at ~24 total for the + * default --n 8 run. We send up to 20 β€” beyond that the prompt gets + * long and sonnet's signal-to-noise drops. If a future run yields + * more, we sample the newest 20. + */ +const MAX_CANDIDATES = 20; + +export interface AdvisorResult { + /** The picked entry's skill_name, or null if all candidates were rejected. */ + pickedSkillName: string | null; + /** Sonnet's free-text justification (kept for debug logs / observability). */ + reason: string; + /** Raw stdout from the model β€” preserved for debug only, not parsed beyond pick/reject. */ + rawOutput: string; +} + +function buildAdvisorPrompt(candidates: LocalManifestEntry[]): string { + const lines: string[] = [ + "You are reviewing skill candidates extracted from a user's coding sessions.", + "Pick the ONE candidate whose `insight` field is most useful to show the user as a", + "concrete finding from their past work. Reply on EXACTLY ONE LINE.", + "", + "GOOD insights are:", + " - Concrete and counted (cite specific numbers, file names, durations)", + " - About a real coding mistake or pattern the USER made (in 2nd person β€” \"You did X\")", + " - Actionable: the user can change behavior based on knowing this", + "", + "BAD insights (REJECT these) are:", + " - Meta-commentary about why the skill was saved (\"User explicitly requested...\")", + " - Vague / generic engineering platitudes the user already knows", + " - About someone other than the user (a teammate, a third party)", + " - Hypothetical (\"could lead to...\", \"might cause...\") rather than observed", + "", + "Output format β€” STRICT, one line only:", + " PICK: ", + "OR", + " REJECT_ALL: ", + "", + "Candidates:", + ]; + for (const [i, c] of candidates.entries()) { + lines.push(`${i + 1}. name=${c.skill_name} insight=${JSON.stringify((c.insight ?? "").slice(0, 400))}`); + } + return lines.join("\n"); +} + +/** + * Parse the advisor's single-line verdict. Defensive against extra + * whitespace, fenced code blocks, and prose preludes that sonnet + * sometimes emits despite the "EXACTLY ONE LINE" instruction. + */ +export function parseAdvisorOutput(raw: string, candidates: LocalManifestEntry[]): AdvisorResult { + const cleaned = raw.trim(); + const pickMatch = cleaned.match(/PICK:\s*(\d+)/i); + if (pickMatch) { + const idx = parseInt(pickMatch[1], 10) - 1; + if (idx >= 0 && idx < candidates.length) { + return { + pickedSkillName: candidates[idx].skill_name, + reason: cleaned, + rawOutput: raw, + }; + } + } + const rejectMatch = cleaned.match(/REJECT_ALL:\s*(.+)/i); + if (rejectMatch) { + return { pickedSkillName: null, reason: rejectMatch[1].trim(), rawOutput: raw }; + } + // Unparseable response β€” treat as reject so we don't pick blindly. + return { pickedSkillName: null, reason: `unparseable advisor output: ${cleaned.slice(0, 120)}`, rawOutput: raw }; +} + +/** + * Invoke the user's `claude --model sonnet` CLI with the prompt piped + * to stdin. Mirrors mine-local's stdin-gate trick to avoid MAX_ARG_STRLEN. + */ +function runAdvisorGate(prompt: string, claudeBin: string): Promise { + return new Promise((resolve, reject) => { + const child = spawn(claudeBin, [ + "-p", + "--no-session-persistence", + "--model", "sonnet", + "--permission-mode", "bypassPermissions", + ], { + stdio: ["pipe", "pipe", "pipe"], + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" }, + }); + let stdout = ""; + let stderr = ""; + let settled = false; + const finish = (err: Error | null, out: string): void => { + if (settled) return; + settled = true; + if (err) reject(err); else resolve(out); + }; + const timer = setTimeout(() => { + try { child.kill("SIGKILL"); } catch { /* best-effort */ } + finish(new Error(`advisor timed out after ${ADVISOR_TIMEOUT_MS}ms`), ""); + }, ADVISOR_TIMEOUT_MS); + child.stdout.on("data", (b: Buffer) => { stdout += b.toString("utf-8"); }); + child.stderr.on("data", (b: Buffer) => { stderr += b.toString("utf-8"); }); + child.on("error", (e: Error) => { clearTimeout(timer); finish(e, ""); }); + child.on("close", (code: number | null) => { + clearTimeout(timer); + if (code !== 0) { + finish(new Error(`advisor CLI exit ${code}; stderr=${stderr.slice(0, 200)}`), ""); + } else { + finish(null, stdout); + } + }); + child.stdin.on("error", (e: Error) => { clearTimeout(timer); finish(e, ""); }); + child.stdin.end(prompt); + }); +} + +/** + * Read the manifest, run the advisor over its insight-bearing entries, + * mark the picked entry with `primary: true`, write back. Returns the + * AdvisorResult (or null when there's nothing to advise on). + * + * Idempotent on the manifest schema: previously-marked primary entries + * are cleared before the new pick is set, so a re-run replaces the + * marking instead of accumulating. + */ +export async function runAdvisor( + manifestPath: string = LOCAL_MANIFEST_PATH, +): Promise { + const m = readLocalManifest(manifestPath); + if (!m || !Array.isArray(m.entries)) return null; + const insightBearing = m.entries.filter( + e => e && typeof e.insight === "string" && e.insight.trim().length > 0, + ); + if (insightBearing.length === 0) return null; + // No advisor call needed when there's exactly one candidate β€” it's + // automatically the best one. Save the sonnet spend. + if (insightBearing.length === 1) { + insightBearing[0].primary = true; + writeLocalManifest(m, manifestPath); + return { + pickedSkillName: insightBearing[0].skill_name, + reason: "trivial pick (single candidate)", + rawOutput: "", + }; + } + const claudeBin = findAgentBin("claude_code"); + if (!claudeBin || !existsSync(claudeBin)) { + // Without a claude CLI we can't call sonnet. Leave the manifest as + // is β€” the existing recency-based pick still works. + return null; + } + // Cap at MAX_CANDIDATES newest-first so the prompt stays bounded. + const ranked = [...insightBearing] + .sort((a, b) => (b.created_at ?? "").localeCompare(a.created_at ?? "")) + .slice(0, MAX_CANDIDATES); + const prompt = buildAdvisorPrompt(ranked); + let raw: string; + try { + raw = await runAdvisorGate(prompt, claudeBin); + } catch (err) { + return { + pickedSkillName: null, + reason: `advisor invocation failed: ${(err as Error).message}`, + rawOutput: "", + }; + } + const result = parseAdvisorOutput(raw, ranked); + if (result.pickedSkillName) { + // Clear any prior primary flags, then mark the picked entry. Same + // skill_name match β€” we mutate the entry in the original manifest + // (not a copy) so the write-back persists. + for (const e of m.entries) { + if (e && e.primary === true) delete e.primary; + } + for (const e of m.entries) { + if (e && e.skill_name === result.pickedSkillName) { + e.primary = true; + break; + } + } + writeLocalManifest(m, manifestPath); + } + return result; +} diff --git a/src/skillify/local-manifest.ts b/src/skillify/local-manifest.ts index c05d67ff..339d9aab 100644 --- a/src/skillify/local-manifest.ts +++ b/src/skillify/local-manifest.ts @@ -45,6 +45,14 @@ export interface LocalManifestEntry { * and fall back to the count-only banner. */ insight?: string; + /** + * Set to true by the advisor pass when this entry is the chosen "best" + * insight to surface. The advisor (sonnet) ranks all insight-bearing + * entries from a mining run by quality (concrete, quantified, non-meta) + * and marks the winner. `getLatestInsightEntry` prefers primary entries + * over the recency tiebreak when present. + */ + primary?: boolean; } export interface LocalManifest { @@ -132,20 +140,27 @@ export function getLatestInsightEntry( if (Number.isFinite(ts) && ts > newestTs) newestTs = ts; } if (!Number.isFinite(newestTs)) return null; - // Second pass: pick the newest insight-bearing entry within the - // latest-run window. Date.parse handles timezone-offset variants; - // unparseable created_at rows are skipped so a single malformed - // entry can't shadow valid ones. + // Second pass: pick the best insight-bearing entry within the + // latest-run window. Preference order: + // 1. `primary: true` entries (advisor marked these as the best + // among the run's candidates by quality criteria) + // 2. otherwise, the newest by created_at + // Date.parse handles timezone-offset variants; unparseable created_at + // rows are skipped so a single malformed entry can't shadow valid ones. let best: LocalManifestEntry | null = null; let bestTs = Number.NEGATIVE_INFINITY; + let bestIsPrimary = false; for (const e of m.entries) { if (!e || typeof e.insight !== "string" || e.insight.trim().length === 0) continue; const ts = Date.parse(e.created_at ?? ""); if (!Number.isFinite(ts)) continue; if (newestTs - ts > LATEST_RUN_WINDOW_MS) continue; - if (ts > bestTs) { + const isPrimary = e.primary === true; + // Primary entries always beat non-primary; among same-class, newer wins. + if (!best || (isPrimary && !bestIsPrimary) || (isPrimary === bestIsPrimary && ts > bestTs)) { best = e; bestTs = ts; + bestIsPrimary = isPrimary; } } return best; diff --git a/tests/claude-code/advisor.test.ts b/tests/claude-code/advisor.test.ts new file mode 100644 index 00000000..5fe2fd1b --- /dev/null +++ b/tests/claude-code/advisor.test.ts @@ -0,0 +1,266 @@ +/** + * Unit tests for src/skillify/advisor.ts β€” the sonnet-based ranking + * pass that picks the best insight-bearing candidate from a mine-local + * manifest. Mocks the claude CLI spawn so tests stay fast and + * deterministic. + */ + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { EventEmitter } from "node:events"; +import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { PassThrough } from "node:stream"; + +type SpawnCall = { cmd: string; args: string[]; stdin: string }; +const spawnCalls: SpawnCall[] = []; +let nextChildBehavior: { + stdout?: string; + exitCode?: number; + emitError?: Error; +} = { stdout: "", exitCode: 0 }; + +vi.mock("node:child_process", () => ({ + spawn: vi.fn((cmd: string, args: string[]) => { + const child = new EventEmitter() as any; + child.stdout = new PassThrough(); + child.stderr = new PassThrough(); + child.stdin = new PassThrough(); + child.kill = vi.fn(); + let stdinBuf = ""; + child.stdin.on("data", (b: Buffer) => { stdinBuf += b.toString("utf-8"); }); + child.stdin.on("finish", () => { + spawnCalls.push({ cmd, args, stdin: stdinBuf }); + const beh = nextChildBehavior; + queueMicrotask(() => { + if (beh.emitError) { + child.emit("error", beh.emitError); + return; + } + if (beh.stdout) child.stdout.write(beh.stdout); + child.stdout.end(); + child.stderr.end(); + child.emit("close", beh.exitCode ?? 0); + }); + }); + return child; + }), +})); + +let findAgentBinReturn: string | null = "/tmp/fake-claude-bin"; +vi.mock("../../src/skillify/gate-runner.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + findAgentBin: () => findAgentBinReturn, + }; +}); + +import { parseAdvisorOutput, runAdvisor } from "../../src/skillify/advisor.js"; +import { readLocalManifest, writeLocalManifest, type LocalManifestEntry } from "../../src/skillify/local-manifest.js"; + +const TMP_HOME = mkdtempSync(join(tmpdir(), "advisor-test-")); +const FAKE_BIN = "/tmp/fake-claude-bin"; +const MANIFEST = join(TMP_HOME, "manifest.json"); +const writeM = (m: import("../../src/skillify/local-manifest.js").LocalManifest) => writeLocalManifest(m, MANIFEST); + +beforeEach(() => { + spawnCalls.length = 0; + nextChildBehavior = { stdout: "PICK: 1", exitCode: 0 }; + findAgentBinReturn = FAKE_BIN; + rmSync(TMP_HOME, { recursive: true, force: true }); + mkdirSync(TMP_HOME, { recursive: true }); + writeFileSync(FAKE_BIN, "// fake claude", "utf-8"); +}); + +afterEach(() => { + try { rmSync(FAKE_BIN); } catch { /* best-effort */ } +}); + +function makeEntry(over: Partial): LocalManifestEntry { + return { + skill_name: "k", + canonical_path: "/x/SKILL.md", + symlinks: [], + source_session_ids: ["sid"], + source_session_paths: ["/x/sid.jsonl"], + source_agent: "claude_code", + gate_agent: "claude_code", + created_at: "2026-05-22T00:00:00.000Z", + uploaded: false, + ...over, + }; +} + +describe("parseAdvisorOutput", () => { + const candidates = [ + makeEntry({ skill_name: "alpha", insight: "a" }), + makeEntry({ skill_name: "beta", insight: "b" }), + makeEntry({ skill_name: "gamma", insight: "c" }), + ]; + + it("parses PICK: at the start of the response", () => { + const r = parseAdvisorOutput("PICK: 2", candidates); + expect(r.pickedSkillName).toBe("beta"); + }); + + it("parses PICK with surrounding whitespace and case", () => { + const r = parseAdvisorOutput("\n pick: 3 \n", candidates); + expect(r.pickedSkillName).toBe("gamma"); + }); + + it("parses REJECT_ALL with a reason", () => { + const r = parseAdvisorOutput("REJECT_ALL: every candidate is meta-noise", candidates); + expect(r.pickedSkillName).toBeNull(); + expect(r.reason).toContain("meta-noise"); + }); + + it("returns null when PICK index is out of range", () => { + const r = parseAdvisorOutput("PICK: 99", candidates); + expect(r.pickedSkillName).toBeNull(); + }); + + it("returns null on unparseable output (defensive: never blind-pick)", () => { + const r = parseAdvisorOutput("Hmm, this is hard to say. Maybe beta?", candidates); + expect(r.pickedSkillName).toBeNull(); + expect(r.reason).toContain("unparseable"); + }); + + it("tolerates prose preamble and picks PICK pattern embedded later in stdout", () => { + // Sonnet sometimes adds 'Looking at the candidates...' prose before the verdict. + const r = parseAdvisorOutput("Looking at the candidates… PICK: 1", candidates); + expect(r.pickedSkillName).toBe("alpha"); + }); +}); + +describe("runAdvisor", () => { + it("returns null when no manifest exists", async () => { + const result = await runAdvisor(MANIFEST); + expect(result).toBeNull(); + // No spawn should fire β€” no candidates to advise on. + expect(spawnCalls).toHaveLength(0); + }); + + it("returns null when manifest has no insight-bearing entries", async () => { + writeM({ + created_at: "x", + entries: [makeEntry({ skill_name: "a" }), makeEntry({ skill_name: "b" })], + }); + const result = await runAdvisor(MANIFEST); + expect(result).toBeNull(); + expect(spawnCalls).toHaveLength(0); + }); + + it("trivial-picks when there's exactly one insight candidate (no sonnet call)", async () => { + // Single candidate is automatically the winner β€” save the spend. + const only = makeEntry({ skill_name: "lone", insight: "a real insight" }); + writeM({ created_at: "x", entries: [only] }); + const result = await runAdvisor(MANIFEST); + expect(result?.pickedSkillName).toBe("lone"); + expect(result?.reason).toContain("trivial pick"); + // Manifest must now have primary: true marked. + const re = readLocalManifest(MANIFEST); + expect(re?.entries[0].primary).toBe(true); + expect(spawnCalls).toHaveLength(0); + }); + + it("invokes claude with --model sonnet on the spawned process", async () => { + writeM({ + created_at: "x", + entries: [ + makeEntry({ skill_name: "a", insight: "insight a" }), + makeEntry({ skill_name: "b", insight: "insight b" }), + ], + }); + nextChildBehavior = { stdout: "PICK: 1", exitCode: 0 }; + await runAdvisor(MANIFEST); + expect(spawnCalls).toHaveLength(1); + expect(spawnCalls[0].cmd).toBe(FAKE_BIN); + const args = spawnCalls[0].args; + expect(args).toContain("--model"); + expect(args[args.indexOf("--model") + 1]).toBe("sonnet"); + // Prompt fed via stdin includes both candidates. + expect(spawnCalls[0].stdin).toContain("insight a"); + expect(spawnCalls[0].stdin).toContain("insight b"); + }); + + it("marks the picked entry as primary in the manifest", async () => { + writeM({ + created_at: "x", + entries: [ + makeEntry({ skill_name: "a", insight: "a" }), + makeEntry({ skill_name: "b", insight: "b" }), + makeEntry({ skill_name: "c", insight: "c" }), + ], + }); + nextChildBehavior = { stdout: "PICK: 2", exitCode: 0 }; + const r = await runAdvisor(MANIFEST); + expect(r?.pickedSkillName).toBeDefined(); + const re = readLocalManifest(MANIFEST)!; + // newest-first ordering inside advisor β†’ candidate index in prompt + // maps to ranked order, NOT manifest order. We just verify SOME + // entry was marked. + const primaries = re.entries.filter(e => e.primary === true); + expect(primaries).toHaveLength(1); + }); + + it("clears prior primary markings before applying the new pick", async () => { + // Idempotency: re-running the advisor should replace the prior + // primary, not add a second one. + writeM({ + created_at: "x", + entries: [ + makeEntry({ skill_name: "old-winner", insight: "old", primary: true }), + makeEntry({ skill_name: "new-winner", insight: "new" }), + ], + }); + nextChildBehavior = { stdout: "PICK: 1", exitCode: 0 }; + await runAdvisor(MANIFEST); + const re = readLocalManifest(MANIFEST)!; + const primaries = re.entries.filter(e => e.primary === true); + expect(primaries).toHaveLength(1); + }); + + it("leaves manifest untouched when advisor REJECT_ALLs", async () => { + writeM({ + created_at: "x", + entries: [ + makeEntry({ skill_name: "a", insight: "meta noise" }), + makeEntry({ skill_name: "b", insight: "more meta noise" }), + ], + }); + nextChildBehavior = { stdout: "REJECT_ALL: all candidates are meta-noise", exitCode: 0 }; + const r = await runAdvisor(MANIFEST); + expect(r?.pickedSkillName).toBeNull(); + const re = readLocalManifest(MANIFEST)!; + expect(re.entries.filter(e => e.primary === true)).toHaveLength(0); + }); + + it("returns null cleanly when no claude CLI is available (fall back to recency pick)", async () => { + findAgentBinReturn = null; + writeM({ + created_at: "x", + entries: [ + makeEntry({ skill_name: "a", insight: "a" }), + makeEntry({ skill_name: "b", insight: "b" }), + ], + }); + const r = await runAdvisor(MANIFEST); + expect(r).toBeNull(); + expect(spawnCalls).toHaveLength(0); + }); + + it("returns an error result (not throw) when the spawn fails β€” caller falls through", async () => { + writeM({ + created_at: "x", + entries: [ + makeEntry({ skill_name: "a", insight: "a" }), + makeEntry({ skill_name: "b", insight: "b" }), + ], + }); + nextChildBehavior = { emitError: new Error("ENOENT") }; + const r = await runAdvisor(MANIFEST); + expect(r?.pickedSkillName).toBeNull(); + expect(r?.reason).toContain("advisor invocation failed"); + }); +}); diff --git a/tests/claude-code/install-scan.test.ts b/tests/claude-code/install-scan.test.ts index 567750c8..d2521f2f 100644 --- a/tests/claude-code/install-scan.test.ts +++ b/tests/claude-code/install-scan.test.ts @@ -71,6 +71,14 @@ vi.mock("../../src/skillify/local-manifest.js", async (importOriginal) => { }; }); +// runAdvisor is mocked so install-scan tests stay focused on install-scan +// behavior. The advisor itself is tested in advisor.test.ts. Without this +// mock, every install-scan test would also exercise the advisor's spawn, +// inflating spawnCalls and breaking length assertions. +vi.mock("../../src/skillify/advisor.js", () => ({ + runAdvisor: vi.fn(async () => null), +})); + import { canOfferInstallScan, formatScanResult, @@ -153,7 +161,7 @@ describe("canOfferInstallScan", () => { }); describe("runInstallScan", () => { - it("spawns `skillify mine-local --n 3 --only claude_code` against the same CLI bundle the install ran from", async () => { + it("spawns `skillify mine-local --n 20 --only claude_code` against the same CLI bundle the install ran from", async () => { nextChildBehavior = { exitCode: 0 }; await runInstallScan(); expect(spawnCalls).toHaveLength(1); @@ -164,9 +172,12 @@ describe("runInstallScan", () => { expect(args[0]).toBe(FAKE_CLI); expect(args).toContain("skillify"); expect(args).toContain("mine-local"); - // `--n 3` is the tight install-time session cap (vs default 8). + // `--n 20` is the install-time session cap. Bumped 3 β†’ 5 β†’ 20 + // across iterations: the recency-biased picker on a machine with + // many recent conversational sessions needs the epsilon-greedy + // random tail (30% of picks) to actually reach coding sessions. expect(args).toContain("--n"); - expect(args[args.indexOf("--n") + 1]).toBe("3"); + expect(args[args.indexOf("--n") + 1]).toBe("20"); // `--only claude_code` honors the "scan your Claude Code sessions" // copy β€” without it, mine-local would walk every installed agent // and could surface an insight from Codex / Cursor (codex PR #198 @@ -175,22 +186,45 @@ describe("runInstallScan", () => { expect(args[args.indexOf("--only") + 1]).toBe("claude_code"); }); - it("deletes the manifest written by mine-local when no insight was produced (P1 guard)", async () => { + it("deletes the manifest when mine-local wrote a literal empty sentinel (entries: [])", async () => { // codex PR #198 P1: mine-local writes a sentinel manifest even - // when 0 insights were found. That sentinel permanently disables + // when 0 SKILLS were found. That sentinel permanently disables // future maybeAutoMineLocal() runs. We unlink it so background // mining can retry as history accumulates. const manifestPath = join(TMP_HOME, ".claude", "hivemind", "local-mined.json"); mkdirSync(join(TMP_HOME, ".claude", "hivemind"), { recursive: true }); writeFileSync(manifestPath, JSON.stringify({ created_at: "x", entries: [] })); nextChildBehavior = { exitCode: 0 }; - nextInsightEntry = null; // mine-local wrote the empty manifest + nextInsightEntry = null; const result = await runInstallScan(); expect(result).toBeNull(); - // Manifest must be gone so future background auto-mine can retry. + // Manifest is gone β€” background auto-mine can retry. expect(existsSyncReal(manifestPath)).toBe(false); }); + it("PRESERVES the manifest when mine-local wrote skills but none have an insight (codex P2)", async () => { + // codex PR #198 P2: the prior fix was over-aggressive. When + // mine-local writes skills (entries.length > 0) but the gate + // omits `insight` on every one of them, we MUST NOT delete the + // manifest β€” countLocalManifestEntries() surfaces the count via + // the legacy SessionStart banner branch, and a future + // `push-local` needs the row metadata. Only the literal + // `entries: []` sentinel should be cleared. + const manifestPath = join(TMP_HOME, ".claude", "hivemind", "local-mined.json"); + mkdirSync(join(TMP_HOME, ".claude", "hivemind"), { recursive: true }); + writeFileSync(manifestPath, JSON.stringify({ + created_at: "x", + entries: [ + { skill_name: "a", canonical_path: "/x", symlinks: [], source_session_ids: [], source_session_paths: [], source_agent: "claude_code", gate_agent: "claude_code", created_at: "2026-05-22T00:00:00.000Z", uploaded: false }, + ], + })); + nextChildBehavior = { exitCode: 0 }; + nextInsightEntry = null; // no insight produced, but skills exist + const result = await runInstallScan(); + expect(result).toBeNull(); + expect(existsSyncReal(manifestPath)).toBe(true); + }); + it("preserves the manifest when an insight WAS produced", async () => { const manifestPath = join(TMP_HOME, ".claude", "hivemind", "local-mined.json"); mkdirSync(join(TMP_HOME, ".claude", "hivemind"), { recursive: true }); @@ -202,8 +236,6 @@ describe("runInstallScan", () => { nextInsightEntry = { skill_name: "k", insight: "i", created_at: "z" }; const result = await runInstallScan(); expect(result).not.toBeNull(); - // Real manifest should still be there β€” we want the insight to - // persist for the user-visible banner on next SessionStart. expect(existsSyncReal(manifestPath)).toBe(true); }); @@ -278,14 +310,18 @@ describe("formatScanResult", () => { expect(out).not.toContain("\t"); }); - it("truncates insight over 200 chars at a word boundary with ellipsis", () => { - const long = "x ".repeat(200).trim() + " end-marker"; + it("truncates insight over 280 chars at a word boundary with ellipsis", () => { + // 280-char cap matches the parseMultiVerdict storage cap, so the + // renderer never truncates beyond what the manifest can store. + // Bumped from 200 β†’ 280 after a real-world test caught a haiku + // insight mid-sentence at 200; 280 gives haiku room to finish a + // single sentence with the punchline intact. + const long = "x ".repeat(280).trim() + " end-marker"; const out = formatScanResult(makeEntry(long)); const insightLine = out.split("\n").find(l => l.includes("πŸ“Œ"))!; - // Allow a few chars of slack for the emoji + leading spaces. - expect(insightLine.length).toBeLessThanOrEqual(220); + // Allow ~10 chars of slack for the emoji + leading spaces. + expect(insightLine.length).toBeLessThanOrEqual(295); expect(insightLine.endsWith("…")).toBe(true); - // end-marker is past the 200-char cap, must be dropped. expect(insightLine).not.toContain("end-marker"); }); From 1e186af47f9d02e5101c9eb6105c8c478c611f64 Mon Sep 17 00:00:00 2001 From: kaghni Date: Sat, 23 May 2026 08:12:55 +0000 Subject: [PATCH 4/6] review: corrupt-manifest cleanup, accurate skills-count messaging, per-suite test bin (codex + coderabbit) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three findings from the previous review pass, all addressed. Codex P2 β€” runInstallScan could leave a truncated manifest behind on timeout or non-zero child exit. Both canOfferInstallScan() and maybeAutoMineLocal() treat any manifest presence as "already mined," so one bad scan would permanently disable retries until the user manually deletes the file. Fix: unlinkManifestIfCorrupt() helper runs on the timeout, non-zero-exit, AND spawn-error paths. Validates via readLocalManifest (which returns null on parse failure) and unlinks when corrupt. canOfferInstallScan guarantees no pre-existing manifest, so anything present after a failed scan is definitively this run's debris β€” safe to remove. Codex P3 β€” runInstallScan returned null in two distinct cases ("scan failed entirely" vs "mine-local wrote skills but none had insight"), and the caller printed "No repeatable patterns found" for both. The second case is a lie: skills WERE mined, just not banner-quality one-liners. Fix: refactor return shape to InstallScanResult { insight, skillsCount }. Caller now distinguishes three branches: insight present (show banner), insight null but skillsCount > 0 ("mined N skills locally"), insight null + zero skills ("no patterns found"). Skills get pulled at next SessionStart regardless. Coderabbit β€” shared /tmp/fake-claude-bin path in tests creates a race when test files run in parallel (one suite deletes while another writes). Fix: per-suite fake-bin under TMP_HOME, declared via join(TMP_HOME, "fake-claude-bin"). Each test suite owns its own mocked-binary path. Tests: - install-scan.test.ts: new P3 regression test (skillsCount > 0 on insight-less mine); new P2 regression test (corrupt manifest unlinked on non-zero exit); mocked countLocalManifestEntries alongside getLatestInsightEntry so the new skillsCount field doesn't leak the developer's real manifest into test results; every InstallScanResult assertion updated to the new shape. - advisor.test.ts: fake-bin path moved under TMP_HOME. - install-scan.test.ts: fake-bin path moved under TMP_HOME. --- src/cli/index.ts | 19 +++++-- src/cli/install-scan.ts | 68 +++++++++++++++++++--- tests/claude-code/advisor.test.ts | 6 +- tests/claude-code/install-scan.test.ts | 79 +++++++++++++++++++++----- 4 files changed, 142 insertions(+), 30 deletions(-) diff --git a/src/cli/index.ts b/src/cli/index.ts index 800f6d6c..52465890 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -203,13 +203,20 @@ async function runAuthGate(args: string[]): Promise { if (scanOk) { log(""); log("Scanning your 20 most-recent sessions (up to 5 min). Be patient β€” haiku is running in the background."); - const entry = await runInstallScan(); - if (entry && entry.insight && entry.insight.trim().length > 0) { - log(""); - log(formatScanResult(entry)); - foundInsight = { skill_name: entry.skill_name }; + const { insight, skillsCount } = await runInstallScan(); + log(""); + if (insight && insight.insight && insight.insight.trim().length > 0) { + log(formatScanResult(insight)); + foundInsight = { skill_name: insight.skill_name }; + } else if (skillsCount > 0) { + // Codex PR #198 P3: don't lie about "no patterns found" when + // mine-local actually wrote skills to disk. They just lacked + // a banner-quality one-liner. The skills are real and the + // user benefits from them on next SessionStart even without + // the install-time banner. + log(`Mined ${skillsCount} skill${skillsCount === 1 ? "" : "s"} locally β€” they'll be available in your next claude session.`); + log("(No banner-quality insight to surface here β€” the gate is conservative on what gets the top-line.)"); } else { - log(""); log("No repeatable patterns found in this scan. (That's OK β€” the gate is conservative.)"); } } diff --git a/src/cli/install-scan.ts b/src/cli/install-scan.ts index b38808ed..d5ac94c0 100644 --- a/src/cli/install-scan.ts +++ b/src/cli/install-scan.ts @@ -27,11 +27,30 @@ import { homedir } from "node:os"; import { join } from "node:path"; import { findAgentBin } from "../skillify/gate-runner.js"; import { + countLocalManifestEntries, getLatestInsightEntry, + readLocalManifest, type LocalManifestEntry, } from "../skillify/local-manifest.js"; import { runAdvisor } from "../skillify/advisor.js"; +/** + * Distinguish the THREE outcomes of an install-time scan so the caller + * can give accurate feedback to the user (codex PR #198 P3): + * 1. `insight` is set: surface the value-show banner. + * 2. `insight` null, `skillsCount > 0`: mining succeeded but none of + * the candidates had a banner-quality insight. Skills ARE on disk; + * the unlock copy can mention them. "No patterns found" would be + * a lie. + * 3. `insight` null, `skillsCount === 0`: nothing was produced. The + * manifest was either truly empty (entries: []) and got unlinked, + * or never written. Fall through to standard unlock copy. + */ +export interface InstallScanResult { + insight: LocalManifestEntry | null; + skillsCount: number; +} + /** * Path roots are resolved at CALL time, not module-load time, so the * guards honor a HOME override applied after import (the unit tests @@ -140,11 +159,27 @@ export function canOfferInstallScan(): boolean { * one, or null for every failure path (timeout, non-zero exit, no * insight in the manifest). */ -export function runInstallScan(): Promise { +/** + * If the manifest is present but unreadable (truncated by a killed + * mid-write, malformed JSON), unlink it so future canOfferInstallScan + * and maybeAutoMineLocal calls can retry. Without this guard, one + * timed-out or crashed scan would permanently disable both the + * install-time offer AND the SessionStart auto-mine path until the + * user manually deletes the file (codex PR #198 P2). + */ +function unlinkManifestIfCorrupt(): void { + const p = manifestPath(); + if (!existsSync(p)) return; + if (readLocalManifest(p) === null) { + try { unlinkSync(p); } catch { /* best-effort */ } + } +} + +export function runInstallScan(): Promise { return new Promise((resolve) => { const cliPath = process.argv[1]; if (!cliPath || !existsSync(cliPath)) { - resolve(null); + resolve({ insight: null, skillsCount: 0 }); return; } const child = spawn( @@ -173,7 +208,7 @@ export function runInstallScan(): Promise { ); let settled = false; - const finish = (result: LocalManifestEntry | null): void => { + const finish = (result: InstallScanResult): void => { if (settled) return; settled = true; resolve(result); @@ -181,12 +216,21 @@ export function runInstallScan(): Promise { const timer = setTimeout(() => { try { child.kill("SIGKILL"); } catch { /* best-effort */ } - finish(null); + // Mining was interrupted mid-write β€” the on-disk manifest may + // be truncated. Clean it up so a future install or auto-mine + // can retry without seeing a corrupt sentinel. + unlinkManifestIfCorrupt(); + finish({ insight: null, skillsCount: 0 }); }, SCAN_TIMEOUT_MS); child.on("close", async (code: number | null) => { clearTimeout(timer); - if (code !== 0) { finish(null); return; } + if (code !== 0) { + // Non-zero exit may also have left a partial manifest behind. + unlinkManifestIfCorrupt(); + finish({ insight: null, skillsCount: 0 }); + return; + } // After mine-local exits cleanly, the manifest is written. Run // the advisor (sonnet) over all insight-bearing candidates to // mark the BEST one as primary. The A/B comparison showed a @@ -199,6 +243,11 @@ export function runInstallScan(): Promise { try { await runAdvisor(); } catch { /* fall through to recency pick */ } let entry: LocalManifestEntry | null = null; try { entry = getLatestInsightEntry(); } catch { /* keep null */ } + // Count is read AFTER the advisor pass β€” it gives the caller + // accurate "skills exist even without a banner-quality insight" + // signal so the UX copy doesn't lie (codex PR #198 P3). + let skillsCount = 0; + try { skillsCount = countLocalManifestEntries(); } catch { /* keep 0 */ } if (!entry && manifestIsTrulyEmpty()) { // ONLY delete the manifest when mine-local wrote a literal // empty sentinel (entries: []). When mine-local DID produce @@ -213,13 +262,18 @@ export function runInstallScan(): Promise { // guarantees there was no pre-existing manifest, so an empty // sentinel here is definitively from THIS spawn. try { unlinkSync(manifestPath()); } catch { /* best-effort */ } + skillsCount = 0; // we just removed the manifest } - finish(entry); + finish({ insight: entry, skillsCount }); }); child.on("error", () => { clearTimeout(timer); - finish(null); + // A spawn error means mine-local never started β€” manifest state + // unchanged. Still validate in case a previous interrupted run + // left debris. + unlinkManifestIfCorrupt(); + finish({ insight: null, skillsCount: 0 }); }); }); } diff --git a/tests/claude-code/advisor.test.ts b/tests/claude-code/advisor.test.ts index 5fe2fd1b..e035bb1e 100644 --- a/tests/claude-code/advisor.test.ts +++ b/tests/claude-code/advisor.test.ts @@ -47,7 +47,7 @@ vi.mock("node:child_process", () => ({ }), })); -let findAgentBinReturn: string | null = "/tmp/fake-claude-bin"; +let findAgentBinReturn: string | null = null; // set in beforeEach after TMP_HOME is computed vi.mock("../../src/skillify/gate-runner.js", async (importOriginal) => { const actual = await importOriginal(); return { @@ -60,7 +60,9 @@ import { parseAdvisorOutput, runAdvisor } from "../../src/skillify/advisor.js"; import { readLocalManifest, writeLocalManifest, type LocalManifestEntry } from "../../src/skillify/local-manifest.js"; const TMP_HOME = mkdtempSync(join(tmpdir(), "advisor-test-")); -const FAKE_BIN = "/tmp/fake-claude-bin"; +// Per-suite fake-bin path under TMP_HOME so parallel test files can't +// race on a shared /tmp/fake-claude-bin (coderabbit on PR #198). +const FAKE_BIN = join(TMP_HOME, "fake-claude-bin"); const MANIFEST = join(TMP_HOME, "manifest.json"); const writeM = (m: import("../../src/skillify/local-manifest.js").LocalManifest) => writeLocalManifest(m, MANIFEST); diff --git a/tests/claude-code/install-scan.test.ts b/tests/claude-code/install-scan.test.ts index d2521f2f..fb45b07e 100644 --- a/tests/claude-code/install-scan.test.ts +++ b/tests/claude-code/install-scan.test.ts @@ -48,7 +48,7 @@ vi.mock("node:child_process", () => ({ // findAgentBin returns a path that may or may not exist on disk β€” we // drive it directly so tests don't depend on the developer's PATH. -let findAgentBinReturn: string | null = "/tmp/fake-claude-bin"; +let findAgentBinReturn: string | null = null; // set in beforeEach vi.mock("../../src/skillify/gate-runner.js", async (importOriginal) => { const actual = await importOriginal(); return { @@ -57,14 +57,20 @@ vi.mock("../../src/skillify/gate-runner.js", async (importOriginal) => { }; }); -// getLatestInsightEntry is mocked so the runInstallScan path doesn't -// read the developer's real ~/.claude/hivemind/local-mined.json. +// getLatestInsightEntry + countLocalManifestEntries are mocked so the +// runInstallScan path doesn't read the developer's real +// ~/.claude/hivemind/local-mined.json. Both accessors capture +// LOCAL_MANIFEST_PATH at module-load (homedir() at import time), so +// without these mocks tests would observe whatever is in the +// developer's real manifest. let nextInsightEntry: any = null; +let nextSkillsCount = 0; vi.mock("../../src/skillify/local-manifest.js", async (importOriginal) => { const actual = await importOriginal(); return { ...actual, getLatestInsightEntry: () => nextInsightEntry, + countLocalManifestEntries: () => nextSkillsCount, // canOfferInstallScan reads LOCAL_MANIFEST_PATH existence directly; // we keep the real export so tests can choose a tmp manifest path // by setting HOME via process.env.HOME below. @@ -89,13 +95,17 @@ const TMP_HOME = mkdtempSync(join(tmpdir(), "install-scan-test-")); const originalHome = process.env.HOME; const originalArgv1 = process.argv[1]; const FAKE_CLI = join(TMP_HOME, "fake-cli.js"); +// Per-suite fake-bin path under TMP_HOME so parallel test files can't +// race on a shared /tmp/fake-claude-bin (coderabbit on PR #198). +const FAKE_BIN = join(TMP_HOME, "fake-claude-bin"); beforeEach(() => { // Reset state between tests. spawnCalls.length = 0; nextChildBehavior = { exitCode: 0 }; - findAgentBinReturn = "/tmp/fake-claude-bin"; + findAgentBinReturn = FAKE_BIN; nextInsightEntry = null; + nextSkillsCount = 0; // Each test starts with a clean tmp HOME: no sessions, no manifest. rmSync(TMP_HOME, { recursive: true, force: true }); mkdirSync(TMP_HOME, { recursive: true }); @@ -106,13 +116,13 @@ beforeEach(() => { writeFileSync(FAKE_CLI, "// fake cli", "utf-8"); process.argv[1] = FAKE_CLI; // Also ensure the mocked "claude bin" exists on disk for the guard. - writeFileSync("/tmp/fake-claude-bin", "// fake claude", "utf-8"); + writeFileSync(FAKE_BIN, "// fake claude", "utf-8"); }); afterEach(() => { process.env.HOME = originalHome; process.argv[1] = originalArgv1; - try { rmSync("/tmp/fake-claude-bin"); } catch { /* best-effort */ } + try { rmSync(FAKE_BIN); } catch { /* best-effort */ } }); describe("canOfferInstallScan", () => { @@ -197,11 +207,50 @@ describe("runInstallScan", () => { nextChildBehavior = { exitCode: 0 }; nextInsightEntry = null; const result = await runInstallScan(); - expect(result).toBeNull(); + expect(result.insight).toBeNull(); // Manifest is gone β€” background auto-mine can retry. expect(existsSyncReal(manifestPath)).toBe(false); }); + it("returns skillsCount > 0 when mine-local wrote skills but no insight surfaced (codex P3)", async () => { + // Regression guard for codex P3: caller used to see `null` from + // runInstallScan and print "No repeatable patterns found" even + // when skills WERE mined. The new shape carries skillsCount so + // the caller can distinguish "scan failed" from "skills mined + // but no banner-quality insight." + const manifestPath = join(TMP_HOME, ".claude", "hivemind", "local-mined.json"); + mkdirSync(join(TMP_HOME, ".claude", "hivemind"), { recursive: true }); + writeFileSync(manifestPath, JSON.stringify({ + created_at: "x", + entries: [ + { skill_name: "a", canonical_path: "/x", symlinks: [], source_session_ids: [], source_session_paths: [], source_agent: "claude_code", gate_agent: "claude_code", created_at: "2026-05-22T00:00:00.000Z", uploaded: false }, + { skill_name: "b", canonical_path: "/x", symlinks: [], source_session_ids: [], source_session_paths: [], source_agent: "claude_code", gate_agent: "claude_code", created_at: "2026-05-22T00:00:01.000Z", uploaded: false }, + ], + })); + nextChildBehavior = { exitCode: 0 }; + nextInsightEntry = null; + nextSkillsCount = 2; + const result = await runInstallScan(); + expect(result.insight).toBeNull(); + expect(result.skillsCount).toBe(2); + }); + + it("unlinks a corrupt/truncated manifest left behind on timeout (codex P2)", async () => { + // Regression guard for codex P2: a timeout/crash mid-write can + // leave the manifest unparseable. Both canOfferInstallScan and + // maybeAutoMineLocal treat presence as "already mined", so a + // corrupt sentinel would permanently disable retries. + const manifestPath = join(TMP_HOME, ".claude", "hivemind", "local-mined.json"); + mkdirSync(join(TMP_HOME, ".claude", "hivemind"), { recursive: true }); + writeFileSync(manifestPath, "{ truncated json β€” not closed"); + nextChildBehavior = { exitCode: 1 }; // simulate child crash + nextInsightEntry = null; + const result = await runInstallScan(); + expect(result.insight).toBeNull(); + // Corrupt manifest unlinked β€” future retries can fire. + expect(existsSyncReal(manifestPath)).toBe(false); + }); + it("PRESERVES the manifest when mine-local wrote skills but none have an insight (codex P2)", async () => { // codex PR #198 P2: the prior fix was over-aggressive. When // mine-local writes skills (entries.length > 0) but the gate @@ -221,7 +270,7 @@ describe("runInstallScan", () => { nextChildBehavior = { exitCode: 0 }; nextInsightEntry = null; // no insight produced, but skills exist const result = await runInstallScan(); - expect(result).toBeNull(); + expect(result.insight).toBeNull(); expect(existsSyncReal(manifestPath)).toBe(true); }); @@ -235,7 +284,7 @@ describe("runInstallScan", () => { nextChildBehavior = { exitCode: 0 }; nextInsightEntry = { skill_name: "k", insight: "i", created_at: "z" }; const result = await runInstallScan(); - expect(result).not.toBeNull(); + expect(result.insight).not.toBeNull(); expect(existsSyncReal(manifestPath)).toBe(true); }); @@ -247,8 +296,8 @@ describe("runInstallScan", () => { }; nextChildBehavior = { exitCode: 0 }; const result = await runInstallScan(); - expect(result).not.toBeNull(); - expect(result!.skill_name).toBe("verify-before-done"); + expect(result.insight).not.toBeNull(); + expect(result.insight!.skill_name).toBe("verify-before-done"); }); it("resolves with null on non-zero exit", async () => { @@ -258,13 +307,13 @@ describe("runInstallScan", () => { // surface its (possibly stale) output. We treat non-zero exit as // "scan failed β†’ fall through silently". const result = await runInstallScan(); - expect(result).toBeNull(); + expect(result.insight).toBeNull(); }); it("resolves with null on spawn error", async () => { nextChildBehavior = { emitError: new Error("ENOENT") }; const result = await runInstallScan(); - expect(result).toBeNull(); + expect(result.insight).toBeNull(); }); it("resolves with null when process.argv[1] points at a missing file (safety guard)", async () => { @@ -272,7 +321,7 @@ describe("runInstallScan", () => { const result = await runInstallScan(); // Spawn should NOT have been called β€” we bail out at the // existsSync check rather than letting node fail mid-spawn. - expect(result).toBeNull(); + expect(result.insight).toBeNull(); expect(spawnCalls).toHaveLength(0); }); @@ -280,7 +329,7 @@ describe("runInstallScan", () => { nextChildBehavior = { exitCode: 0 }; nextInsightEntry = null; const result = await runInstallScan(); - expect(result).toBeNull(); + expect(result.insight).toBeNull(); }); }); From bad06466788445207e616cb94390d76200dff212 Mon Sep 17 00:00:00 2001 From: kaghni Date: Sat, 23 May 2026 08:20:24 +0000 Subject: [PATCH 5/6] =?UTF-8?q?review:=20honor=20advisor=20REJECT=5FALL=20?= =?UTF-8?q?=E2=80=94=20don't=20surface=20the=20candidate=20sonnet=20reject?= =?UTF-8?q?ed=20(codex=20P2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex caught a real regression: when sonnet returned REJECT_ALL (every candidate is meta-noise), runInstallScan still fell through to getLatestInsightEntry() and surfaced the newest insight regardless. That meant we'd show the user EXACTLY the candidate the advisor just rejected β€” exactly the meta-noise the whole pass was supposed to suppress. Fix: capture the AdvisorResult and distinguish three outcomes: 1. advisorResult === null (advisor didn't run β€” no CLI / no candidates / no manifest): fall through to recency pick. Same behavior as before the advisor existed. 2. advisorResult.pickedSkillName !== null (advisor marked a winner): getLatestInsightEntry returns the primary-marked entry, banner fires. 3. advisorResult.pickedSkillName === null (REJECT_ALL or unparseable verdict): suppress the insight entirely. Caller falls through to the skillsCount > 0 / "no patterns found" branches, which are accurate without lying about what the advisor saw. Tests: - New install-scan test verifies REJECT_ALL with a recency-pick available β†’ result.insight is null (not the rejected candidate). - New test verifies null advisor result (no opinion) still uses the recency pick β€” distinguishes "advisor said no" from "advisor didn't speak." - nextAdvisorResult mock setting added to the existing harness. --- src/cli/install-scan.ts | 28 ++++++++++++----- tests/claude-code/install-scan.test.ts | 42 +++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 8 deletions(-) diff --git a/src/cli/install-scan.ts b/src/cli/install-scan.ts index d5ac94c0..e9dce496 100644 --- a/src/cli/install-scan.ts +++ b/src/cli/install-scan.ts @@ -32,7 +32,7 @@ import { readLocalManifest, type LocalManifestEntry, } from "../skillify/local-manifest.js"; -import { runAdvisor } from "../skillify/advisor.js"; +import { runAdvisor, type AdvisorResult } from "../skillify/advisor.js"; /** * Distinguish the THREE outcomes of an install-time scan so the caller @@ -236,13 +236,27 @@ export function runInstallScan(): Promise { // mark the BEST one as primary. The A/B comparison showed a // significant jump in surfaced-insight quality with the advisor // pass: it consistently rejects meta-noise / vague candidates - // and picks the most concrete + counted finding. Falls through - // silently on any advisor error (timeout, no claude CLI, sonnet - // rejects all) β€” getLatestInsightEntry just uses the recency - // tiebreak. - try { await runAdvisor(); } catch { /* fall through to recency pick */ } + // and picks the most concrete + counted finding. + // + // Three outcomes: + // 1. advisorResult === null: advisor didn't run (no CLI, no + // candidates, no manifest). Fall through to recency pick. + // 2. advisorResult.pickedSkillName !== null: advisor picked + // a winner and marked it primary. getLatestInsightEntry + // will return that primary entry. + // 3. advisorResult.pickedSkillName === null: advisor evaluated + // candidates and REJECTED them all (or returned an + // unparseable verdict). DO NOT surface any insight β€” that + // would show exactly the candidate sonnet just rejected, + // defeating the entire advisor pass (codex PR #198 P2). + // Fall through to count-only branch in the caller. + let advisorResult: AdvisorResult | null = null; + try { advisorResult = await runAdvisor(); } catch { /* keep null */ } + const advisorRejected = advisorResult !== null && advisorResult.pickedSkillName === null; let entry: LocalManifestEntry | null = null; - try { entry = getLatestInsightEntry(); } catch { /* keep null */ } + if (!advisorRejected) { + try { entry = getLatestInsightEntry(); } catch { /* keep null */ } + } // Count is read AFTER the advisor pass β€” it gives the caller // accurate "skills exist even without a banner-quality insight" // signal so the UX copy doesn't lie (codex PR #198 P3). diff --git a/tests/claude-code/install-scan.test.ts b/tests/claude-code/install-scan.test.ts index fb45b07e..5673c4bd 100644 --- a/tests/claude-code/install-scan.test.ts +++ b/tests/claude-code/install-scan.test.ts @@ -81,8 +81,9 @@ vi.mock("../../src/skillify/local-manifest.js", async (importOriginal) => { // behavior. The advisor itself is tested in advisor.test.ts. Without this // mock, every install-scan test would also exercise the advisor's spawn, // inflating spawnCalls and breaking length assertions. +let nextAdvisorResult: any = null; vi.mock("../../src/skillify/advisor.js", () => ({ - runAdvisor: vi.fn(async () => null), + runAdvisor: vi.fn(async () => nextAdvisorResult), })); import { @@ -106,6 +107,7 @@ beforeEach(() => { findAgentBinReturn = FAKE_BIN; nextInsightEntry = null; nextSkillsCount = 0; + nextAdvisorResult = null; // Each test starts with a clean tmp HOME: no sessions, no manifest. rmSync(TMP_HOME, { recursive: true, force: true }); mkdirSync(TMP_HOME, { recursive: true }); @@ -212,6 +214,44 @@ describe("runInstallScan", () => { expect(existsSyncReal(manifestPath)).toBe(false); }); + it("suppresses insight when advisor REJECT_ALLs even if a recency-pick would otherwise exist (codex P2)", async () => { + // Regression guard for codex's third-round P2: if sonnet returns + // REJECT_ALL, runAdvisor's pickedSkillName is null. Without the + // suppression check, runInstallScan would still fall through to + // getLatestInsightEntry() and surface the exact candidate the + // advisor just rejected β€” defeating the advisor pass entirely. + nextChildBehavior = { exitCode: 0 }; + nextInsightEntry = { + skill_name: "rejected-meta-noise", + insight: "User explicitly requested this rule be saved.", + created_at: "2026-05-22T00:00:00.000Z", + }; + nextSkillsCount = 3; + nextAdvisorResult = { pickedSkillName: null, reason: "REJECT_ALL: all meta-noise", rawOutput: "" }; + const result = await runInstallScan(); + expect(result.insight).toBeNull(); + // Skills count still surfaces β€” the caller can show "mined N skills" + // copy instead of either the rejected insight or "no patterns found." + expect(result.skillsCount).toBe(3); + }); + + it("uses the recency pick when advisor returns null (no candidates / no CLI / no manifest)", async () => { + // Distinguishes the REJECT_ALL case (advisor evaluated and said no) + // from the no-opinion case (advisor didn't run). When advisor itself + // returned null, we trust the recency tiebreak. + nextChildBehavior = { exitCode: 0 }; + nextInsightEntry = { + skill_name: "concrete-pattern", + insight: "You hit X twice last week.", + created_at: "2026-05-22T00:00:00.000Z", + }; + nextSkillsCount = 1; + nextAdvisorResult = null; // advisor didn't run at all + const result = await runInstallScan(); + expect(result.insight).not.toBeNull(); + expect(result.insight!.skill_name).toBe("concrete-pattern"); + }); + it("returns skillsCount > 0 when mine-local wrote skills but no insight surfaced (codex P3)", async () => { // Regression guard for codex P3: caller used to see `null` from // runInstallScan and print "No repeatable patterns found" even From 01ed9458eb7c2e8f3c55df18151bec78296d3c78 Mon Sep 17 00:00:00 2001 From: kaghni Date: Sat, 23 May 2026 20:04:05 +0000 Subject: [PATCH 6/6] =?UTF-8?q?perf(install):=20bump=20--n=2020=20?= =?UTF-8?q?=E2=86=92=2010=20based=20on=20stress-test=20data?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Real-machine stress test on the user's al-projects sessions: N | mine | advisor | total | skills | insights | scaling --+----------+---------+---------+--------+----------+-------- 5 | 95.3s | 4.9s | 100.2s | 3 | 3 | β€” 10 | 219.5s | 11.3s | 230.8s | 13 | 11 | 2.30Γ— 20 | 285.3s | 6.6s | 291.9s | 25 | 23 | 1.30Γ— Key observations: - Latency scales sub-linearly past Nβ‰ˆ10 (concurrency=4 caps parallelism: ceil(N/4) batches Γ— per-batch slowest call). Going 20 β†’ 10 saves 60s with only the bottom half of candidates lost. - Advisor cost is 2-5% of total install-scan time. Killing it saves <10s β€” not the bottleneck. Keep advisor. - Quality at N=10 matches N=20 in the advisor's pick. The advisor's job is to filter meta-noise, and 10 candidates is enough material (real-data check: advisor picked 'metric-downstream-impact-analysis' with a concrete + counted insight at N=10; same quality bar as the N=20 pick). Going with N=10 β€” the sweet spot. 25% faster than N=20 with no quality regression. Total install latency drops from ~5 min to ~4 min for users who opt into the scan. Test + copy updates: - install-scan.test.ts: assert --n is "10"; updated comment - cli/index.ts: user-facing copy says "10 sessions" and "2-4 minutes" (was "20 sessions" / "2-5 minutes") - INSTALL_SCAN_SESSION_COUNT comment captures the full tuning history so the next person knows why this number, not just what it is --- src/cli/index.ts | 4 ++-- src/cli/install-scan.ts | 26 ++++++++++++++++++-------- tests/claude-code/install-scan.test.ts | 13 +++++++------ 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/cli/index.ts b/src/cli/index.ts index 52465890..30df2537 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -197,12 +197,12 @@ async function runAuthGate(args: string[]): Promise { // wording as misleading status output. log(""); log("🐝 Want me to scan your recent Claude Code sessions for repeatable mistakes?"); - log("Takes 2-5 minutes. Scans 20 sessions in parallel using your Claude Code subscription."); + log("Takes 2-4 minutes. Scans 10 sessions in parallel using your Claude Code subscription."); log(""); const scanOk = await confirm("Scan now?", true); if (scanOk) { log(""); - log("Scanning your 20 most-recent sessions (up to 5 min). Be patient β€” haiku is running in the background."); + log("Scanning your 10 most-recent sessions (up to 5 min). Be patient β€” haiku is running in the background."); const { insight, skillsCount } = await runInstallScan(); log(""); if (insight && insight.insight && insight.insight.trim().length > 0) { diff --git a/src/cli/install-scan.ts b/src/cli/install-scan.ts index e9dce496..e3f3e99f 100644 --- a/src/cli/install-scan.ts +++ b/src/cli/install-scan.ts @@ -77,15 +77,25 @@ function manifestPath(): string { const SCAN_TIMEOUT_MS = 300_000; /** - * Sessions to mine on the install-time pass. Bumped from 5 β†’ 20 after - * real-world testing: on a machine with ~340 sessions where the newest - * dozen are all conversational/planning content (no coding mistakes), - * --n 5 returned zero insights. The picker uses epsilon-greedy - * (Ξ΅=0.3) so ~6 of 20 picks are random β€” high enough to reach back - * into history and surface actual coding sessions where real - * repeatable-mistake patterns live. + * Sessions to mine on the install-time pass. Tuned across iterations + * (3 β†’ 5 β†’ 20 β†’ 10) with real-world latency + quality measurements: + * + * --n 5: 95s total, 3 candidates. Too few for the advisor to + * reject meta-noise and still have good options. + * --n 10: 230s total, ~10-13 candidates. Sweet spot β€” advisor + * consistently picks a concrete + counted insight; 7+ of + * the candidates are typically high-quality. + * --n 20: 290s total, ~25 candidates. Diminishing returns: 60s + * more wait for the user, no measurable quality lift in + * the advisor's pick (just more candidates to rank). + * + * Concurrency=4 caps parallelism, so the latency curve flattens past + * Nβ‰ˆ10 (ceil(N/4) batches Γ— per-batch slowest-call time). The + * epsilon-greedy picker (Ξ΅=0.3) gives ~3 random picks at N=10 β€” + * enough to reach past recent conversational sessions into real + * coding work. */ -const INSTALL_SCAN_SESSION_COUNT = 20; +const INSTALL_SCAN_SESSION_COUNT = 10; /** * Cheap top-level scan: does any `~/.claude/projects/*` subdir contain diff --git a/tests/claude-code/install-scan.test.ts b/tests/claude-code/install-scan.test.ts index 5673c4bd..0ab1cde6 100644 --- a/tests/claude-code/install-scan.test.ts +++ b/tests/claude-code/install-scan.test.ts @@ -173,7 +173,7 @@ describe("canOfferInstallScan", () => { }); describe("runInstallScan", () => { - it("spawns `skillify mine-local --n 20 --only claude_code` against the same CLI bundle the install ran from", async () => { + it("spawns `skillify mine-local --n 10 --only claude_code` against the same CLI bundle the install ran from", async () => { nextChildBehavior = { exitCode: 0 }; await runInstallScan(); expect(spawnCalls).toHaveLength(1); @@ -184,12 +184,13 @@ describe("runInstallScan", () => { expect(args[0]).toBe(FAKE_CLI); expect(args).toContain("skillify"); expect(args).toContain("mine-local"); - // `--n 20` is the install-time session cap. Bumped 3 β†’ 5 β†’ 20 - // across iterations: the recency-biased picker on a machine with - // many recent conversational sessions needs the epsilon-greedy - // random tail (30% of picks) to actually reach coding sessions. + // `--n 10` is the install-time session cap. Tuned across + // iterations (3 β†’ 5 β†’ 20 β†’ 10) with real-data latency + quality + // measurements: 10 is the sweet spot β€” concurrency=4 caps + // parallelism so the curve flattens past Nβ‰ˆ10, and advisor pick + // quality at N=10 matches N=20 in practice. expect(args).toContain("--n"); - expect(args[args.indexOf("--n") + 1]).toBe("20"); + expect(args[args.indexOf("--n") + 1]).toBe("10"); // `--only claude_code` honors the "scan your Claude Code sessions" // copy β€” without it, mine-local would walk every installed agent // and could surface an insight from Codex / Cursor (codex PR #198