diff --git a/src/cli/index.ts b/src/cli/index.ts index f71db5ae..30df2537 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -18,6 +18,7 @@ import { confirm, detectPlatforms, allPlatformIds, log, promptLine, warn, type P import { getVersion } from "./version.js"; import { runUpdate } from "./update.js"; import { renderCliHelpBlock } from "./skillify-spec.js"; +import { canOfferInstallScan, runInstallScan, formatScanResult } from "./install-scan.js"; const AUTH_SUBCOMMANDS = new Set([ "whoami", @@ -178,14 +179,68 @@ async function runAuthGate(args: string[]): Promise { return; } + // Install-time value-show: when the guards pass (claude CLI present, + // prior sessions on disk, no manifest yet, TTY attached), offer to + // scan the user's recent sessions for repeatable mistakes BEFORE + // showing the abstract sign-in pitch. A real insight from their own + // work converts on "keep this skill across machines" better than the + // generic "shared memory" copy. + // + // Every failure path (declined, timed out, no insight emitted) + // returns null and we fall through to the existing unlock copy β€” the + // install never dead-ends on a scan failure. + let foundInsight: { skill_name: string } | null = null; + if (canOfferInstallScan()) { + // Don't claim "Hivemind installed" here β€” runAuthGate runs BEFORE + // the per-platform installers, so the assistant install hasn't + // actually happened yet. Codex PR #198 P3 flagged the earlier + // wording as misleading status output. + log(""); + log("🐝 Want me to scan your recent Claude Code sessions for repeatable mistakes?"); + log("Takes 2-4 minutes. Scans 10 sessions in parallel using your Claude Code subscription."); + log(""); + const scanOk = await confirm("Scan now?", true); + if (scanOk) { + log(""); + log("Scanning your 10 most-recent sessions (up to 5 min). Be patient β€” haiku is running in the background."); + const { insight, skillsCount } = await runInstallScan(); + log(""); + if (insight && insight.insight && insight.insight.trim().length > 0) { + log(formatScanResult(insight)); + foundInsight = { skill_name: insight.skill_name }; + } else if (skillsCount > 0) { + // Codex PR #198 P3: don't lie about "no patterns found" when + // mine-local actually wrote skills to disk. They just lacked + // a banner-quality one-liner. The skills are real and the + // user benefits from them on next SessionStart even without + // the install-time banner. + log(`Mined ${skillsCount} skill${skillsCount === 1 ? "" : "s"} locally β€” they'll be available in your next claude session.`); + log("(No banner-quality insight to surface here β€” the gate is conservative on what gets the top-line.)"); + } else { + log("No repeatable patterns found in this scan. (That's OK β€” the gate is conservative.)"); + } + } + } + log(""); - log("🐝 One more step to unlock Hivemind"); - log(""); - log("To enable shared memory and auto-learning across your agents,"); - log("we need to sign you in. Your traces will be securely stored in"); - log("your private Hivemind, so all your agents can recall them."); - log(""); - log("You can later connect your own cloud storage like S3/GCS/Azure Blob."); + if (foundInsight) { + // Insight-aware sign-in pitch: lead with the concrete value the + // user just saw rather than the generic "shared memory" framing. + // Skill name is kebab-case-validated upstream by mine-local's + // assertValidSkillName, so it's safe to interpolate inline. + log("🐝 Sign in to keep this skill across machines and share it with your team."); + log(""); + log(`Without sign-in, \`${foundInsight.skill_name}\` lives only on this machine and`); + log("won't follow you to a new laptop or be shared with teammates who'd benefit."); + } else { + log("🐝 One more step to unlock Hivemind"); + log(""); + log("To enable shared memory and auto-learning across your agents,"); + log("we need to sign you in. Your traces will be securely stored in"); + log("your private Hivemind, so all your agents can recall them."); + log(""); + log("You can later connect your own cloud storage like S3/GCS/Azure Blob."); + } log(""); const yes = await confirm("Sign in now?", true); diff --git a/src/cli/install-scan.ts b/src/cli/install-scan.ts new file mode 100644 index 00000000..e3f3e99f --- /dev/null +++ b/src/cli/install-scan.ts @@ -0,0 +1,329 @@ +/** + * Install-time value-show: scan the user's recent local agent sessions for + * repeatable mistakes and surface a concrete insight inline in the + * `hivemind install` output, BEFORE the auth prompt. + * + * Captures the conversion moment: a fresh installer who declines sign-in + * usually never returns. By showing a real finding from THEIR own work + * up-front, the sign-in CTA becomes "keep this skill across machines" + * instead of the abstract "shared memory" pitch. + * + * Guarded β€” only runs when: + * 1. Claude Code CLI is on disk (the gate runner needs it). + * 2. The user has at least one .jsonl session under ~/.claude/projects/ + * (cold-install users have nothing to mine; we fall through silently). + * 3. No mine-local manifest exists yet (re-installers already mined; the + * sentinel blocks duplicate runs and we don't want to nag them). + * 4. TTY is attached (we need to prompt y/n). + * + * Failure modes (user declined, timed out, gate returned no insight, child + * crashed) all return null β€” caller falls through to the existing + * "🐝 One more step to unlock Hivemind" copy without surfacing an error. + */ + +import { spawn } from "node:child_process"; +import { existsSync, readFileSync, readdirSync, unlinkSync } from "node:fs"; +import { homedir } from "node:os"; +import { join } from "node:path"; +import { findAgentBin } from "../skillify/gate-runner.js"; +import { + countLocalManifestEntries, + getLatestInsightEntry, + readLocalManifest, + type LocalManifestEntry, +} from "../skillify/local-manifest.js"; +import { runAdvisor, type AdvisorResult } from "../skillify/advisor.js"; + +/** + * Distinguish the THREE outcomes of an install-time scan so the caller + * can give accurate feedback to the user (codex PR #198 P3): + * 1. `insight` is set: surface the value-show banner. + * 2. `insight` null, `skillsCount > 0`: mining succeeded but none of + * the candidates had a banner-quality insight. Skills ARE on disk; + * the unlock copy can mention them. "No patterns found" would be + * a lie. + * 3. `insight` null, `skillsCount === 0`: nothing was produced. The + * manifest was either truly empty (entries: []) and got unlinked, + * or never written. Fall through to standard unlock copy. + */ +export interface InstallScanResult { + insight: LocalManifestEntry | null; + skillsCount: number; +} + +/** + * Path roots are resolved at CALL time, not module-load time, so the + * guards honor a HOME override applied after import (the unit tests + * rely on this; production HOME never changes mid-process so the + * runtime cost is irrelevant). + */ +function claudeProjectsDir(): string { + return join(homedir(), ".claude", "projects"); +} +function manifestPath(): string { + return join(homedir(), ".claude", "hivemind", "local-mined.json"); +} + +/** + * Hard cap on the synchronous scan during install. With session count + * = 20 and concurrency = 4, haiku runs 5 sequential batches at ~30-60s + * each β†’ realistic wall clock 150-300s. 5 minutes is the ceiling + * before we give up and fall through. The user opted into the wait + * via the y/n prompt, so a long-but-bounded scan is acceptable; the + * alternative (3-5 sessions) was empirically too few to escape the + * recency-biased picker's tendency to pick conversational meta- + * sessions on machines with rich history. + */ +const SCAN_TIMEOUT_MS = 300_000; + +/** + * Sessions to mine on the install-time pass. Tuned across iterations + * (3 β†’ 5 β†’ 20 β†’ 10) with real-world latency + quality measurements: + * + * --n 5: 95s total, 3 candidates. Too few for the advisor to + * reject meta-noise and still have good options. + * --n 10: 230s total, ~10-13 candidates. Sweet spot β€” advisor + * consistently picks a concrete + counted insight; 7+ of + * the candidates are typically high-quality. + * --n 20: 290s total, ~25 candidates. Diminishing returns: 60s + * more wait for the user, no measurable quality lift in + * the advisor's pick (just more candidates to rank). + * + * Concurrency=4 caps parallelism, so the latency curve flattens past + * Nβ‰ˆ10 (ceil(N/4) batches Γ— per-batch slowest-call time). The + * epsilon-greedy picker (Ξ΅=0.3) gives ~3 random picks at N=10 β€” + * enough to reach past recent conversational sessions into real + * coding work. + */ +const INSTALL_SCAN_SESSION_COUNT = 10; + +/** + * Cheap top-level scan: does any `~/.claude/projects/*` subdir contain + * at least one `.jsonl`? We don't recurse into subagent dirs β€” the + * mine-local worker has its own session picker, this guard only needs + * to answer "is there anything to mine?". + */ +/** + * Read the manifest mine-local just wrote and return true ONLY when + * the entries array is empty. Distinguishes the "no skills written" + * sentinel from a count-only mine (skills written, none with insight). + * Returns false on any read/parse error to be safe β€” better to leave + * a malformed file alone than delete real data we couldn't decode. + */ +function manifestIsTrulyEmpty(): boolean { + const p = manifestPath(); + if (!existsSync(p)) return false; + try { + const m = JSON.parse(readFileSync(p, "utf-8")) as { entries?: unknown }; + return Array.isArray(m.entries) && m.entries.length === 0; + } catch { + return false; + } +} + +function hasLocalClaudeSessions(): boolean { + const projectsDir = claudeProjectsDir(); + if (!existsSync(projectsDir)) return false; + let subdirs: string[]; + try { + subdirs = readdirSync(projectsDir); + } catch { + return false; + } + for (const sub of subdirs) { + let files: string[]; + try { + files = readdirSync(join(projectsDir, sub)); + } catch { + continue; + } + if (files.some(f => f.endsWith(".jsonl"))) return true; + } + return false; +} + +/** + * Guards: every condition that must hold before we even prompt the user + * for a scan. Returning false means "skip the offer entirely, fall + * through to the standard auth copy" β€” no banner, no half-state. + */ +export function canOfferInstallScan(): boolean { + const bin = findAgentBin("claude_code"); + if (!bin || !existsSync(bin)) return false; + if (!hasLocalClaudeSessions()) return false; + if (existsSync(manifestPath())) return false; + return true; +} + +/** + * Spawn the worktree's own `hivemind skillify mine-local` as a detached- + * style child, but await its exit synchronously (with timeout). Using + * `process.execPath` + `process.argv[1]` guarantees we run the SAME CLI + * bundle the user is currently inside β€” no version skew between the + * install flow and the worker that does the mining. + * + * stdio is silenced so the install UX stays clean. mine-local's own + * logs land in `~/.claude/hooks/mine-local.log` for postmortems. + * + * Returns the latest insight-bearing manifest entry if mining produced + * one, or null for every failure path (timeout, non-zero exit, no + * insight in the manifest). + */ +/** + * If the manifest is present but unreadable (truncated by a killed + * mid-write, malformed JSON), unlink it so future canOfferInstallScan + * and maybeAutoMineLocal calls can retry. Without this guard, one + * timed-out or crashed scan would permanently disable both the + * install-time offer AND the SessionStart auto-mine path until the + * user manually deletes the file (codex PR #198 P2). + */ +function unlinkManifestIfCorrupt(): void { + const p = manifestPath(); + if (!existsSync(p)) return; + if (readLocalManifest(p) === null) { + try { unlinkSync(p); } catch { /* best-effort */ } + } +} + +export function runInstallScan(): Promise { + return new Promise((resolve) => { + const cliPath = process.argv[1]; + if (!cliPath || !existsSync(cliPath)) { + resolve({ insight: null, skillsCount: 0 }); + return; + } + const child = spawn( + process.execPath, + [ + cliPath, + "skillify", + "mine-local", + "--n", + String(INSTALL_SCAN_SESSION_COUNT), + // The install copy advertises a "Claude Code" scan, so filter + // the mine-local picker to claude_code sessions. Without this, + // mine-local walks every installed agent (Codex, Cursor, + // Hermes, pi) and could surface an insight from a Codex + // session despite what we promised β€” codex PR #198 P2. + "--only", + "claude_code", + ], + { + stdio: ["ignore", "ignore", "ignore"], + // HIVEMIND_CAPTURE=false: the spawned mine-local would otherwise + // try to capture its own activity, which is a no-op without + // credentials but spams the log. Keep it quiet. + env: { ...process.env, HIVEMIND_CAPTURE: "false" }, + }, + ); + + let settled = false; + const finish = (result: InstallScanResult): void => { + if (settled) return; + settled = true; + resolve(result); + }; + + const timer = setTimeout(() => { + try { child.kill("SIGKILL"); } catch { /* best-effort */ } + // Mining was interrupted mid-write β€” the on-disk manifest may + // be truncated. Clean it up so a future install or auto-mine + // can retry without seeing a corrupt sentinel. + unlinkManifestIfCorrupt(); + finish({ insight: null, skillsCount: 0 }); + }, SCAN_TIMEOUT_MS); + + child.on("close", async (code: number | null) => { + clearTimeout(timer); + if (code !== 0) { + // Non-zero exit may also have left a partial manifest behind. + unlinkManifestIfCorrupt(); + finish({ insight: null, skillsCount: 0 }); + return; + } + // After mine-local exits cleanly, the manifest is written. Run + // the advisor (sonnet) over all insight-bearing candidates to + // mark the BEST one as primary. The A/B comparison showed a + // significant jump in surfaced-insight quality with the advisor + // pass: it consistently rejects meta-noise / vague candidates + // and picks the most concrete + counted finding. + // + // Three outcomes: + // 1. advisorResult === null: advisor didn't run (no CLI, no + // candidates, no manifest). Fall through to recency pick. + // 2. advisorResult.pickedSkillName !== null: advisor picked + // a winner and marked it primary. getLatestInsightEntry + // will return that primary entry. + // 3. advisorResult.pickedSkillName === null: advisor evaluated + // candidates and REJECTED them all (or returned an + // unparseable verdict). DO NOT surface any insight β€” that + // would show exactly the candidate sonnet just rejected, + // defeating the entire advisor pass (codex PR #198 P2). + // Fall through to count-only branch in the caller. + let advisorResult: AdvisorResult | null = null; + try { advisorResult = await runAdvisor(); } catch { /* keep null */ } + const advisorRejected = advisorResult !== null && advisorResult.pickedSkillName === null; + let entry: LocalManifestEntry | null = null; + if (!advisorRejected) { + try { entry = getLatestInsightEntry(); } catch { /* keep null */ } + } + // Count is read AFTER the advisor pass β€” it gives the caller + // accurate "skills exist even without a banner-quality insight" + // signal so the UX copy doesn't lie (codex PR #198 P3). + let skillsCount = 0; + try { skillsCount = countLocalManifestEntries(); } catch { /* keep 0 */ } + if (!entry && manifestIsTrulyEmpty()) { + // ONLY delete the manifest when mine-local wrote a literal + // empty sentinel (entries: []). When mine-local DID produce + // skills but the gate omitted `insight` on all of them, the + // manifest still has value β€” countLocalManifestEntries() will + // surface the count via the legacy SessionStart banner branch, + // and a future `push-local` flow needs the row metadata. The + // earlier blanket-unlink path was over-aggressive (codex + // PR #198 P2): it told users "no patterns found" even when + // skills had been written, and re-armed the background auto- + // mine for the next session unnecessarily. canOfferInstallScan + // guarantees there was no pre-existing manifest, so an empty + // sentinel here is definitively from THIS spawn. + try { unlinkSync(manifestPath()); } catch { /* best-effort */ } + skillsCount = 0; // we just removed the manifest + } + finish({ insight: entry, skillsCount }); + }); + + child.on("error", () => { + clearTimeout(timer); + // A spawn error means mine-local never started β€” manifest state + // unchanged. Still validate in case a previous interrupted run + // left debris. + unlinkManifestIfCorrupt(); + finish({ insight: null, skillsCount: 0 }); + }); + }); +} + +/** + * Pure renderer for the post-scan banner. Returns the multi-line block + * the install flow prints when an insight was found. Kept pure so the + * unit test can assert on the rendered output without standing up a + * real mine-local run. + * + * The skill name is rendered as a backticked code span β€” same as the + * SessionStart banner β€” and the insight is truncated to 200 chars so + * a verbose haiku output stays readable inline in the terminal. + */ +export function formatScanResult(entry: LocalManifestEntry): string { + const rawInsight = (entry.insight ?? "").replace(/\s+/g, " ").trim(); + // Cap at 280 chars (same as the parseMultiVerdict storage cap), so we + // never truncate beyond what was persisted. The earlier 200-char cap + // sometimes lost the punchline of haiku's insights mid-sentence β€” + // 280 is the longest a stored insight can ever be. + const insight = rawInsight.length > 280 + ? rawInsight.slice(0, 277).replace(/\s\S*$/, "") + "…" + : rawInsight; + return ( + `βœ“ Found a pattern in your past sessions:\n` + + ` πŸ“Œ ${insight}\n` + + ` ✨ Skill \`${entry.skill_name}\` ready to catch it next time` + ); +} diff --git a/src/commands/mine-local.ts b/src/commands/mine-local.ts index 8d4fc5e5..893d46eb 100644 --- a/src/commands/mine-local.ts +++ b/src/commands/mine-local.ts @@ -478,6 +478,7 @@ async function runMineLocalImpl(args: string[]): Promise { const force = takeBoolFlag(work, "--force"); const dryRun = takeBoolFlag(work, "--dry-run"); const nRaw = takeFlagValue(work, "--n"); + const onlyAgent = takeFlagValue(work, "--only"); if (loadManifest() && !force) { console.error(`Local skills have already been mined on this machine.`); @@ -486,12 +487,25 @@ async function runMineLocalImpl(args: string[]): Promise { process.exit(1); } - const installs = detectInstalledAgents(); - if (installs.length === 0) { + const installsAll = detectInstalledAgents(); + if (installsAll.length === 0) { console.error(`No agent session directories detected. Run a session first.`); process.exit(1); } - console.log(`Detected installed agents: ${installs.map(i => i.agent).join(", ")}`); + // Optional agent filter: scope mining to a single agent's sessions + // (e.g. `--only claude_code`). Used by the install-time scan to + // honor a "scan your Claude Code sessions" promise β€” without this + // filter, the picker walks every installed agent's sessions and + // could surface an insight from Codex / Cursor / etc when only + // Claude Code was advertised (codex PR #198 P2 finding). + const installs = onlyAgent + ? installsAll.filter(i => i.agent === onlyAgent) + : installsAll; + if (installs.length === 0) { + console.error(`No '${onlyAgent}' session directory detected. Skipping mine-local.`); + process.exit(1); + } + console.log(`Detected installed agents: ${installs.map(i => i.agent).join(", ")}${onlyAgent ? ` (filtered to ${onlyAgent})` : ""}`); const host = detectHostAgent(); const fallback = installs[0].agent; diff --git a/src/skillify/advisor.ts b/src/skillify/advisor.ts new file mode 100644 index 00000000..942b2943 --- /dev/null +++ b/src/skillify/advisor.ts @@ -0,0 +1,230 @@ +/** + * Insight advisor β€” runs after a mine-local pass to pick the BEST + * insight-bearing candidate from a manifest. Calls sonnet via the + * user's claude CLI (haiku is the executor that produces N candidates + * in parallel; sonnet is the advisor that selects one). + * + * Pattern (per Anthropic's "Executor/Advisor"): + * - Executor (haiku, parallel Γ— N): cheap, runs once per session, + * produces 0-3 skill candidates each. No quality floor β€” haiku + * sometimes emits meta-commentary ("user asked to save this rule") + * because the session it saw was a meta-conversation, not coding. + * - Advisor (sonnet, single call): reads ALL candidates, ranks by + * quality criteria, marks the winner with `primary: true` in the + * manifest. If every candidate is meta-noise / vague, returns + * null (no banner surfaces β€” falls back to the count line). + * + * Cost: one sonnet call per mine-local run (~$0.10-0.30 with the + * candidate set sizes we cap at). Worth the spend at install time, + * where surface attention is the scarce resource and we get one shot + * at the impression. + * + * The advisor is intentionally separate from gate-runner so the + * haiku-parallel path stays untouched. We invoke `claude --model sonnet` + * directly with stdin-fed prompt + parse a deterministic single-line + * response. + */ + +import { spawn } from "node:child_process"; +import { existsSync } from "node:fs"; +import { findAgentBin } from "./gate-runner.js"; +import { + LOCAL_MANIFEST_PATH, + readLocalManifest, + writeLocalManifest, + type LocalManifestEntry, +} from "./local-manifest.js"; + +/** + * Hard cap on the advisor call. Sonnet on a small candidate list + * (typically 4-15 entries) returns in ~5-20s. 60s ceiling before we + * give up and leave the manifest unmarked β€” the rule will fall back + * to the recency-based pick. + */ +const ADVISOR_TIMEOUT_MS = 60_000; + +/** + * Maximum candidates we send to the advisor. Mine-local typically + * produces 0-3 candidates per session, capped at ~24 total for the + * default --n 8 run. We send up to 20 β€” beyond that the prompt gets + * long and sonnet's signal-to-noise drops. If a future run yields + * more, we sample the newest 20. + */ +const MAX_CANDIDATES = 20; + +export interface AdvisorResult { + /** The picked entry's skill_name, or null if all candidates were rejected. */ + pickedSkillName: string | null; + /** Sonnet's free-text justification (kept for debug logs / observability). */ + reason: string; + /** Raw stdout from the model β€” preserved for debug only, not parsed beyond pick/reject. */ + rawOutput: string; +} + +function buildAdvisorPrompt(candidates: LocalManifestEntry[]): string { + const lines: string[] = [ + "You are reviewing skill candidates extracted from a user's coding sessions.", + "Pick the ONE candidate whose `insight` field is most useful to show the user as a", + "concrete finding from their past work. Reply on EXACTLY ONE LINE.", + "", + "GOOD insights are:", + " - Concrete and counted (cite specific numbers, file names, durations)", + " - About a real coding mistake or pattern the USER made (in 2nd person β€” \"You did X\")", + " - Actionable: the user can change behavior based on knowing this", + "", + "BAD insights (REJECT these) are:", + " - Meta-commentary about why the skill was saved (\"User explicitly requested...\")", + " - Vague / generic engineering platitudes the user already knows", + " - About someone other than the user (a teammate, a third party)", + " - Hypothetical (\"could lead to...\", \"might cause...\") rather than observed", + "", + "Output format β€” STRICT, one line only:", + " PICK: ", + "OR", + " REJECT_ALL: ", + "", + "Candidates:", + ]; + for (const [i, c] of candidates.entries()) { + lines.push(`${i + 1}. name=${c.skill_name} insight=${JSON.stringify((c.insight ?? "").slice(0, 400))}`); + } + return lines.join("\n"); +} + +/** + * Parse the advisor's single-line verdict. Defensive against extra + * whitespace, fenced code blocks, and prose preludes that sonnet + * sometimes emits despite the "EXACTLY ONE LINE" instruction. + */ +export function parseAdvisorOutput(raw: string, candidates: LocalManifestEntry[]): AdvisorResult { + const cleaned = raw.trim(); + const pickMatch = cleaned.match(/PICK:\s*(\d+)/i); + if (pickMatch) { + const idx = parseInt(pickMatch[1], 10) - 1; + if (idx >= 0 && idx < candidates.length) { + return { + pickedSkillName: candidates[idx].skill_name, + reason: cleaned, + rawOutput: raw, + }; + } + } + const rejectMatch = cleaned.match(/REJECT_ALL:\s*(.+)/i); + if (rejectMatch) { + return { pickedSkillName: null, reason: rejectMatch[1].trim(), rawOutput: raw }; + } + // Unparseable response β€” treat as reject so we don't pick blindly. + return { pickedSkillName: null, reason: `unparseable advisor output: ${cleaned.slice(0, 120)}`, rawOutput: raw }; +} + +/** + * Invoke the user's `claude --model sonnet` CLI with the prompt piped + * to stdin. Mirrors mine-local's stdin-gate trick to avoid MAX_ARG_STRLEN. + */ +function runAdvisorGate(prompt: string, claudeBin: string): Promise { + return new Promise((resolve, reject) => { + const child = spawn(claudeBin, [ + "-p", + "--no-session-persistence", + "--model", "sonnet", + "--permission-mode", "bypassPermissions", + ], { + stdio: ["pipe", "pipe", "pipe"], + env: { ...process.env, HIVEMIND_WIKI_WORKER: "1", HIVEMIND_CAPTURE: "false" }, + }); + let stdout = ""; + let stderr = ""; + let settled = false; + const finish = (err: Error | null, out: string): void => { + if (settled) return; + settled = true; + if (err) reject(err); else resolve(out); + }; + const timer = setTimeout(() => { + try { child.kill("SIGKILL"); } catch { /* best-effort */ } + finish(new Error(`advisor timed out after ${ADVISOR_TIMEOUT_MS}ms`), ""); + }, ADVISOR_TIMEOUT_MS); + child.stdout.on("data", (b: Buffer) => { stdout += b.toString("utf-8"); }); + child.stderr.on("data", (b: Buffer) => { stderr += b.toString("utf-8"); }); + child.on("error", (e: Error) => { clearTimeout(timer); finish(e, ""); }); + child.on("close", (code: number | null) => { + clearTimeout(timer); + if (code !== 0) { + finish(new Error(`advisor CLI exit ${code}; stderr=${stderr.slice(0, 200)}`), ""); + } else { + finish(null, stdout); + } + }); + child.stdin.on("error", (e: Error) => { clearTimeout(timer); finish(e, ""); }); + child.stdin.end(prompt); + }); +} + +/** + * Read the manifest, run the advisor over its insight-bearing entries, + * mark the picked entry with `primary: true`, write back. Returns the + * AdvisorResult (or null when there's nothing to advise on). + * + * Idempotent on the manifest schema: previously-marked primary entries + * are cleared before the new pick is set, so a re-run replaces the + * marking instead of accumulating. + */ +export async function runAdvisor( + manifestPath: string = LOCAL_MANIFEST_PATH, +): Promise { + const m = readLocalManifest(manifestPath); + if (!m || !Array.isArray(m.entries)) return null; + const insightBearing = m.entries.filter( + e => e && typeof e.insight === "string" && e.insight.trim().length > 0, + ); + if (insightBearing.length === 0) return null; + // No advisor call needed when there's exactly one candidate β€” it's + // automatically the best one. Save the sonnet spend. + if (insightBearing.length === 1) { + insightBearing[0].primary = true; + writeLocalManifest(m, manifestPath); + return { + pickedSkillName: insightBearing[0].skill_name, + reason: "trivial pick (single candidate)", + rawOutput: "", + }; + } + const claudeBin = findAgentBin("claude_code"); + if (!claudeBin || !existsSync(claudeBin)) { + // Without a claude CLI we can't call sonnet. Leave the manifest as + // is β€” the existing recency-based pick still works. + return null; + } + // Cap at MAX_CANDIDATES newest-first so the prompt stays bounded. + const ranked = [...insightBearing] + .sort((a, b) => (b.created_at ?? "").localeCompare(a.created_at ?? "")) + .slice(0, MAX_CANDIDATES); + const prompt = buildAdvisorPrompt(ranked); + let raw: string; + try { + raw = await runAdvisorGate(prompt, claudeBin); + } catch (err) { + return { + pickedSkillName: null, + reason: `advisor invocation failed: ${(err as Error).message}`, + rawOutput: "", + }; + } + const result = parseAdvisorOutput(raw, ranked); + if (result.pickedSkillName) { + // Clear any prior primary flags, then mark the picked entry. Same + // skill_name match β€” we mutate the entry in the original manifest + // (not a copy) so the write-back persists. + for (const e of m.entries) { + if (e && e.primary === true) delete e.primary; + } + for (const e of m.entries) { + if (e && e.skill_name === result.pickedSkillName) { + e.primary = true; + break; + } + } + writeLocalManifest(m, manifestPath); + } + return result; +} diff --git a/src/skillify/local-manifest.ts b/src/skillify/local-manifest.ts index c05d67ff..339d9aab 100644 --- a/src/skillify/local-manifest.ts +++ b/src/skillify/local-manifest.ts @@ -45,6 +45,14 @@ export interface LocalManifestEntry { * and fall back to the count-only banner. */ insight?: string; + /** + * Set to true by the advisor pass when this entry is the chosen "best" + * insight to surface. The advisor (sonnet) ranks all insight-bearing + * entries from a mining run by quality (concrete, quantified, non-meta) + * and marks the winner. `getLatestInsightEntry` prefers primary entries + * over the recency tiebreak when present. + */ + primary?: boolean; } export interface LocalManifest { @@ -132,20 +140,27 @@ export function getLatestInsightEntry( if (Number.isFinite(ts) && ts > newestTs) newestTs = ts; } if (!Number.isFinite(newestTs)) return null; - // Second pass: pick the newest insight-bearing entry within the - // latest-run window. Date.parse handles timezone-offset variants; - // unparseable created_at rows are skipped so a single malformed - // entry can't shadow valid ones. + // Second pass: pick the best insight-bearing entry within the + // latest-run window. Preference order: + // 1. `primary: true` entries (advisor marked these as the best + // among the run's candidates by quality criteria) + // 2. otherwise, the newest by created_at + // Date.parse handles timezone-offset variants; unparseable created_at + // rows are skipped so a single malformed entry can't shadow valid ones. let best: LocalManifestEntry | null = null; let bestTs = Number.NEGATIVE_INFINITY; + let bestIsPrimary = false; for (const e of m.entries) { if (!e || typeof e.insight !== "string" || e.insight.trim().length === 0) continue; const ts = Date.parse(e.created_at ?? ""); if (!Number.isFinite(ts)) continue; if (newestTs - ts > LATEST_RUN_WINDOW_MS) continue; - if (ts > bestTs) { + const isPrimary = e.primary === true; + // Primary entries always beat non-primary; among same-class, newer wins. + if (!best || (isPrimary && !bestIsPrimary) || (isPrimary === bestIsPrimary && ts > bestTs)) { best = e; bestTs = ts; + bestIsPrimary = isPrimary; } } return best; diff --git a/tests/claude-code/advisor.test.ts b/tests/claude-code/advisor.test.ts new file mode 100644 index 00000000..e035bb1e --- /dev/null +++ b/tests/claude-code/advisor.test.ts @@ -0,0 +1,268 @@ +/** + * Unit tests for src/skillify/advisor.ts β€” the sonnet-based ranking + * pass that picks the best insight-bearing candidate from a mine-local + * manifest. Mocks the claude CLI spawn so tests stay fast and + * deterministic. + */ + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { EventEmitter } from "node:events"; +import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { PassThrough } from "node:stream"; + +type SpawnCall = { cmd: string; args: string[]; stdin: string }; +const spawnCalls: SpawnCall[] = []; +let nextChildBehavior: { + stdout?: string; + exitCode?: number; + emitError?: Error; +} = { stdout: "", exitCode: 0 }; + +vi.mock("node:child_process", () => ({ + spawn: vi.fn((cmd: string, args: string[]) => { + const child = new EventEmitter() as any; + child.stdout = new PassThrough(); + child.stderr = new PassThrough(); + child.stdin = new PassThrough(); + child.kill = vi.fn(); + let stdinBuf = ""; + child.stdin.on("data", (b: Buffer) => { stdinBuf += b.toString("utf-8"); }); + child.stdin.on("finish", () => { + spawnCalls.push({ cmd, args, stdin: stdinBuf }); + const beh = nextChildBehavior; + queueMicrotask(() => { + if (beh.emitError) { + child.emit("error", beh.emitError); + return; + } + if (beh.stdout) child.stdout.write(beh.stdout); + child.stdout.end(); + child.stderr.end(); + child.emit("close", beh.exitCode ?? 0); + }); + }); + return child; + }), +})); + +let findAgentBinReturn: string | null = null; // set in beforeEach after TMP_HOME is computed +vi.mock("../../src/skillify/gate-runner.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + findAgentBin: () => findAgentBinReturn, + }; +}); + +import { parseAdvisorOutput, runAdvisor } from "../../src/skillify/advisor.js"; +import { readLocalManifest, writeLocalManifest, type LocalManifestEntry } from "../../src/skillify/local-manifest.js"; + +const TMP_HOME = mkdtempSync(join(tmpdir(), "advisor-test-")); +// Per-suite fake-bin path under TMP_HOME so parallel test files can't +// race on a shared /tmp/fake-claude-bin (coderabbit on PR #198). +const FAKE_BIN = join(TMP_HOME, "fake-claude-bin"); +const MANIFEST = join(TMP_HOME, "manifest.json"); +const writeM = (m: import("../../src/skillify/local-manifest.js").LocalManifest) => writeLocalManifest(m, MANIFEST); + +beforeEach(() => { + spawnCalls.length = 0; + nextChildBehavior = { stdout: "PICK: 1", exitCode: 0 }; + findAgentBinReturn = FAKE_BIN; + rmSync(TMP_HOME, { recursive: true, force: true }); + mkdirSync(TMP_HOME, { recursive: true }); + writeFileSync(FAKE_BIN, "// fake claude", "utf-8"); +}); + +afterEach(() => { + try { rmSync(FAKE_BIN); } catch { /* best-effort */ } +}); + +function makeEntry(over: Partial): LocalManifestEntry { + return { + skill_name: "k", + canonical_path: "/x/SKILL.md", + symlinks: [], + source_session_ids: ["sid"], + source_session_paths: ["/x/sid.jsonl"], + source_agent: "claude_code", + gate_agent: "claude_code", + created_at: "2026-05-22T00:00:00.000Z", + uploaded: false, + ...over, + }; +} + +describe("parseAdvisorOutput", () => { + const candidates = [ + makeEntry({ skill_name: "alpha", insight: "a" }), + makeEntry({ skill_name: "beta", insight: "b" }), + makeEntry({ skill_name: "gamma", insight: "c" }), + ]; + + it("parses PICK: at the start of the response", () => { + const r = parseAdvisorOutput("PICK: 2", candidates); + expect(r.pickedSkillName).toBe("beta"); + }); + + it("parses PICK with surrounding whitespace and case", () => { + const r = parseAdvisorOutput("\n pick: 3 \n", candidates); + expect(r.pickedSkillName).toBe("gamma"); + }); + + it("parses REJECT_ALL with a reason", () => { + const r = parseAdvisorOutput("REJECT_ALL: every candidate is meta-noise", candidates); + expect(r.pickedSkillName).toBeNull(); + expect(r.reason).toContain("meta-noise"); + }); + + it("returns null when PICK index is out of range", () => { + const r = parseAdvisorOutput("PICK: 99", candidates); + expect(r.pickedSkillName).toBeNull(); + }); + + it("returns null on unparseable output (defensive: never blind-pick)", () => { + const r = parseAdvisorOutput("Hmm, this is hard to say. Maybe beta?", candidates); + expect(r.pickedSkillName).toBeNull(); + expect(r.reason).toContain("unparseable"); + }); + + it("tolerates prose preamble and picks PICK pattern embedded later in stdout", () => { + // Sonnet sometimes adds 'Looking at the candidates...' prose before the verdict. + const r = parseAdvisorOutput("Looking at the candidates… PICK: 1", candidates); + expect(r.pickedSkillName).toBe("alpha"); + }); +}); + +describe("runAdvisor", () => { + it("returns null when no manifest exists", async () => { + const result = await runAdvisor(MANIFEST); + expect(result).toBeNull(); + // No spawn should fire β€” no candidates to advise on. + expect(spawnCalls).toHaveLength(0); + }); + + it("returns null when manifest has no insight-bearing entries", async () => { + writeM({ + created_at: "x", + entries: [makeEntry({ skill_name: "a" }), makeEntry({ skill_name: "b" })], + }); + const result = await runAdvisor(MANIFEST); + expect(result).toBeNull(); + expect(spawnCalls).toHaveLength(0); + }); + + it("trivial-picks when there's exactly one insight candidate (no sonnet call)", async () => { + // Single candidate is automatically the winner β€” save the spend. + const only = makeEntry({ skill_name: "lone", insight: "a real insight" }); + writeM({ created_at: "x", entries: [only] }); + const result = await runAdvisor(MANIFEST); + expect(result?.pickedSkillName).toBe("lone"); + expect(result?.reason).toContain("trivial pick"); + // Manifest must now have primary: true marked. + const re = readLocalManifest(MANIFEST); + expect(re?.entries[0].primary).toBe(true); + expect(spawnCalls).toHaveLength(0); + }); + + it("invokes claude with --model sonnet on the spawned process", async () => { + writeM({ + created_at: "x", + entries: [ + makeEntry({ skill_name: "a", insight: "insight a" }), + makeEntry({ skill_name: "b", insight: "insight b" }), + ], + }); + nextChildBehavior = { stdout: "PICK: 1", exitCode: 0 }; + await runAdvisor(MANIFEST); + expect(spawnCalls).toHaveLength(1); + expect(spawnCalls[0].cmd).toBe(FAKE_BIN); + const args = spawnCalls[0].args; + expect(args).toContain("--model"); + expect(args[args.indexOf("--model") + 1]).toBe("sonnet"); + // Prompt fed via stdin includes both candidates. + expect(spawnCalls[0].stdin).toContain("insight a"); + expect(spawnCalls[0].stdin).toContain("insight b"); + }); + + it("marks the picked entry as primary in the manifest", async () => { + writeM({ + created_at: "x", + entries: [ + makeEntry({ skill_name: "a", insight: "a" }), + makeEntry({ skill_name: "b", insight: "b" }), + makeEntry({ skill_name: "c", insight: "c" }), + ], + }); + nextChildBehavior = { stdout: "PICK: 2", exitCode: 0 }; + const r = await runAdvisor(MANIFEST); + expect(r?.pickedSkillName).toBeDefined(); + const re = readLocalManifest(MANIFEST)!; + // newest-first ordering inside advisor β†’ candidate index in prompt + // maps to ranked order, NOT manifest order. We just verify SOME + // entry was marked. + const primaries = re.entries.filter(e => e.primary === true); + expect(primaries).toHaveLength(1); + }); + + it("clears prior primary markings before applying the new pick", async () => { + // Idempotency: re-running the advisor should replace the prior + // primary, not add a second one. + writeM({ + created_at: "x", + entries: [ + makeEntry({ skill_name: "old-winner", insight: "old", primary: true }), + makeEntry({ skill_name: "new-winner", insight: "new" }), + ], + }); + nextChildBehavior = { stdout: "PICK: 1", exitCode: 0 }; + await runAdvisor(MANIFEST); + const re = readLocalManifest(MANIFEST)!; + const primaries = re.entries.filter(e => e.primary === true); + expect(primaries).toHaveLength(1); + }); + + it("leaves manifest untouched when advisor REJECT_ALLs", async () => { + writeM({ + created_at: "x", + entries: [ + makeEntry({ skill_name: "a", insight: "meta noise" }), + makeEntry({ skill_name: "b", insight: "more meta noise" }), + ], + }); + nextChildBehavior = { stdout: "REJECT_ALL: all candidates are meta-noise", exitCode: 0 }; + const r = await runAdvisor(MANIFEST); + expect(r?.pickedSkillName).toBeNull(); + const re = readLocalManifest(MANIFEST)!; + expect(re.entries.filter(e => e.primary === true)).toHaveLength(0); + }); + + it("returns null cleanly when no claude CLI is available (fall back to recency pick)", async () => { + findAgentBinReturn = null; + writeM({ + created_at: "x", + entries: [ + makeEntry({ skill_name: "a", insight: "a" }), + makeEntry({ skill_name: "b", insight: "b" }), + ], + }); + const r = await runAdvisor(MANIFEST); + expect(r).toBeNull(); + expect(spawnCalls).toHaveLength(0); + }); + + it("returns an error result (not throw) when the spawn fails β€” caller falls through", async () => { + writeM({ + created_at: "x", + entries: [ + makeEntry({ skill_name: "a", insight: "a" }), + makeEntry({ skill_name: "b", insight: "b" }), + ], + }); + nextChildBehavior = { emitError: new Error("ENOENT") }; + const r = await runAdvisor(MANIFEST); + expect(r?.pickedSkillName).toBeNull(); + expect(r?.reason).toContain("advisor invocation failed"); + }); +}); diff --git a/tests/claude-code/install-scan.test.ts b/tests/claude-code/install-scan.test.ts new file mode 100644 index 00000000..0ab1cde6 --- /dev/null +++ b/tests/claude-code/install-scan.test.ts @@ -0,0 +1,436 @@ +/** + * Unit tests for src/cli/install-scan.ts β€” the install-time value-show + * scan helpers. The scan itself spawns mine-local, which we mock at + * the child_process boundary so tests stay fast and deterministic. + * + * Coverage targets: canOfferInstallScan guard chain (every false-path, + * plus the all-conditions-met true-path); runInstallScan timeout + + * happy path + manifest read; formatScanResult rendering invariants. + */ + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { EventEmitter } from "node:events"; +import { existsSync as existsSyncReal, mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir, homedir } from "node:os"; +import { join } from "node:path"; + +// child_process.spawn is mocked so the "scan" test doesn't actually +// invoke node β€” we drive child events from inside the test. +type SpawnCall = { cmd: string; args: string[] }; +const spawnCalls: SpawnCall[] = []; +let nextChildBehavior: { + exitCode?: number; + emitError?: Error; + delayMs?: number; +} = { exitCode: 0 }; + +vi.mock("node:child_process", () => ({ + spawn: vi.fn((cmd: string, args: string[]) => { + spawnCalls.push({ cmd, args }); + const child = new EventEmitter() as any; + child.kill = vi.fn(); + const behavior = nextChildBehavior; + queueMicrotask(() => { + const delay = behavior.delayMs ?? 0; + const fire = () => { + if (behavior.emitError) { + child.emit("error", behavior.emitError); + } else { + child.emit("close", behavior.exitCode ?? 0); + } + }; + if (delay > 0) setTimeout(fire, delay); + else fire(); + }); + return child; + }), +})); + +// findAgentBin returns a path that may or may not exist on disk β€” we +// drive it directly so tests don't depend on the developer's PATH. +let findAgentBinReturn: string | null = null; // set in beforeEach +vi.mock("../../src/skillify/gate-runner.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + findAgentBin: (..._a: unknown[]) => findAgentBinReturn, + }; +}); + +// getLatestInsightEntry + countLocalManifestEntries are mocked so the +// runInstallScan path doesn't read the developer's real +// ~/.claude/hivemind/local-mined.json. Both accessors capture +// LOCAL_MANIFEST_PATH at module-load (homedir() at import time), so +// without these mocks tests would observe whatever is in the +// developer's real manifest. +let nextInsightEntry: any = null; +let nextSkillsCount = 0; +vi.mock("../../src/skillify/local-manifest.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + getLatestInsightEntry: () => nextInsightEntry, + countLocalManifestEntries: () => nextSkillsCount, + // canOfferInstallScan reads LOCAL_MANIFEST_PATH existence directly; + // we keep the real export so tests can choose a tmp manifest path + // by setting HOME via process.env.HOME below. + }; +}); + +// runAdvisor is mocked so install-scan tests stay focused on install-scan +// behavior. The advisor itself is tested in advisor.test.ts. Without this +// mock, every install-scan test would also exercise the advisor's spawn, +// inflating spawnCalls and breaking length assertions. +let nextAdvisorResult: any = null; +vi.mock("../../src/skillify/advisor.js", () => ({ + runAdvisor: vi.fn(async () => nextAdvisorResult), +})); + +import { + canOfferInstallScan, + formatScanResult, + runInstallScan, +} from "../../src/cli/install-scan.js"; + +const TMP_HOME = mkdtempSync(join(tmpdir(), "install-scan-test-")); +const originalHome = process.env.HOME; +const originalArgv1 = process.argv[1]; +const FAKE_CLI = join(TMP_HOME, "fake-cli.js"); +// Per-suite fake-bin path under TMP_HOME so parallel test files can't +// race on a shared /tmp/fake-claude-bin (coderabbit on PR #198). +const FAKE_BIN = join(TMP_HOME, "fake-claude-bin"); + +beforeEach(() => { + // Reset state between tests. + spawnCalls.length = 0; + nextChildBehavior = { exitCode: 0 }; + findAgentBinReturn = FAKE_BIN; + nextInsightEntry = null; + nextSkillsCount = 0; + nextAdvisorResult = null; + // Each test starts with a clean tmp HOME: no sessions, no manifest. + rmSync(TMP_HOME, { recursive: true, force: true }); + mkdirSync(TMP_HOME, { recursive: true }); + process.env.HOME = TMP_HOME; + // process.argv[1] is what runInstallScan spawns. Point it at a real + // file so the existsSync check passes; the actual content doesn't + // matter because spawn is mocked. + writeFileSync(FAKE_CLI, "// fake cli", "utf-8"); + process.argv[1] = FAKE_CLI; + // Also ensure the mocked "claude bin" exists on disk for the guard. + writeFileSync(FAKE_BIN, "// fake claude", "utf-8"); +}); + +afterEach(() => { + process.env.HOME = originalHome; + process.argv[1] = originalArgv1; + try { rmSync(FAKE_BIN); } catch { /* best-effort */ } +}); + +describe("canOfferInstallScan", () => { + function seedSession(): void { + const projectsDir = join(TMP_HOME, ".claude", "projects", "sample-proj"); + mkdirSync(projectsDir, { recursive: true }); + writeFileSync(join(projectsDir, "abc.jsonl"), "{}\n", "utf-8"); + } + + it("returns false when no claude CLI is present (gate runner needs it)", () => { + findAgentBinReturn = null; + seedSession(); + expect(canOfferInstallScan()).toBe(false); + }); + + it("returns false when findAgentBin returns a path that doesn't exist", () => { + findAgentBinReturn = "/nonexistent/claude"; + seedSession(); + expect(canOfferInstallScan()).toBe(false); + }); + + it("returns false when ~/.claude/projects is missing (truly fresh claude install)", () => { + // No sessions seeded β†’ cold-install path β†’ no scan offer. + expect(canOfferInstallScan()).toBe(false); + }); + + it("returns false when ~/.claude/projects exists but contains no .jsonl files", () => { + // Subdir exists but is empty β€” the recursive scan finds nothing + // and we don't waste user attention on a doomed offer. + mkdirSync(join(TMP_HOME, ".claude", "projects", "empty"), { recursive: true }); + expect(canOfferInstallScan()).toBe(false); + }); + + it("returns false when the mine-local manifest already exists (re-installer)", () => { + seedSession(); + const manifestDir = join(TMP_HOME, ".claude", "hivemind"); + mkdirSync(manifestDir, { recursive: true }); + writeFileSync(join(manifestDir, "local-mined.json"), "{}\n", "utf-8"); + expect(canOfferInstallScan()).toBe(false); + }); + + it("returns true when all guards pass: claude CLI + sessions + no manifest", () => { + seedSession(); + expect(canOfferInstallScan()).toBe(true); + }); +}); + +describe("runInstallScan", () => { + it("spawns `skillify mine-local --n 10 --only claude_code` against the same CLI bundle the install ran from", async () => { + nextChildBehavior = { exitCode: 0 }; + await runInstallScan(); + expect(spawnCalls).toHaveLength(1); + const { cmd, args } = spawnCalls[0]; + expect(cmd).toBe(process.execPath); + // Always spawns OUR cli bundle, never `which hivemind`, so the + // worker is the same version as the parent install process. + expect(args[0]).toBe(FAKE_CLI); + expect(args).toContain("skillify"); + expect(args).toContain("mine-local"); + // `--n 10` is the install-time session cap. Tuned across + // iterations (3 β†’ 5 β†’ 20 β†’ 10) with real-data latency + quality + // measurements: 10 is the sweet spot β€” concurrency=4 caps + // parallelism so the curve flattens past Nβ‰ˆ10, and advisor pick + // quality at N=10 matches N=20 in practice. + expect(args).toContain("--n"); + expect(args[args.indexOf("--n") + 1]).toBe("10"); + // `--only claude_code` honors the "scan your Claude Code sessions" + // copy β€” without it, mine-local would walk every installed agent + // and could surface an insight from Codex / Cursor (codex PR #198 + // P2). Regression guard. + expect(args).toContain("--only"); + expect(args[args.indexOf("--only") + 1]).toBe("claude_code"); + }); + + it("deletes the manifest when mine-local wrote a literal empty sentinel (entries: [])", async () => { + // codex PR #198 P1: mine-local writes a sentinel manifest even + // when 0 SKILLS were found. That sentinel permanently disables + // future maybeAutoMineLocal() runs. We unlink it so background + // mining can retry as history accumulates. + const manifestPath = join(TMP_HOME, ".claude", "hivemind", "local-mined.json"); + mkdirSync(join(TMP_HOME, ".claude", "hivemind"), { recursive: true }); + writeFileSync(manifestPath, JSON.stringify({ created_at: "x", entries: [] })); + nextChildBehavior = { exitCode: 0 }; + nextInsightEntry = null; + const result = await runInstallScan(); + expect(result.insight).toBeNull(); + // Manifest is gone β€” background auto-mine can retry. + expect(existsSyncReal(manifestPath)).toBe(false); + }); + + it("suppresses insight when advisor REJECT_ALLs even if a recency-pick would otherwise exist (codex P2)", async () => { + // Regression guard for codex's third-round P2: if sonnet returns + // REJECT_ALL, runAdvisor's pickedSkillName is null. Without the + // suppression check, runInstallScan would still fall through to + // getLatestInsightEntry() and surface the exact candidate the + // advisor just rejected β€” defeating the advisor pass entirely. + nextChildBehavior = { exitCode: 0 }; + nextInsightEntry = { + skill_name: "rejected-meta-noise", + insight: "User explicitly requested this rule be saved.", + created_at: "2026-05-22T00:00:00.000Z", + }; + nextSkillsCount = 3; + nextAdvisorResult = { pickedSkillName: null, reason: "REJECT_ALL: all meta-noise", rawOutput: "" }; + const result = await runInstallScan(); + expect(result.insight).toBeNull(); + // Skills count still surfaces β€” the caller can show "mined N skills" + // copy instead of either the rejected insight or "no patterns found." + expect(result.skillsCount).toBe(3); + }); + + it("uses the recency pick when advisor returns null (no candidates / no CLI / no manifest)", async () => { + // Distinguishes the REJECT_ALL case (advisor evaluated and said no) + // from the no-opinion case (advisor didn't run). When advisor itself + // returned null, we trust the recency tiebreak. + nextChildBehavior = { exitCode: 0 }; + nextInsightEntry = { + skill_name: "concrete-pattern", + insight: "You hit X twice last week.", + created_at: "2026-05-22T00:00:00.000Z", + }; + nextSkillsCount = 1; + nextAdvisorResult = null; // advisor didn't run at all + const result = await runInstallScan(); + expect(result.insight).not.toBeNull(); + expect(result.insight!.skill_name).toBe("concrete-pattern"); + }); + + it("returns skillsCount > 0 when mine-local wrote skills but no insight surfaced (codex P3)", async () => { + // Regression guard for codex P3: caller used to see `null` from + // runInstallScan and print "No repeatable patterns found" even + // when skills WERE mined. The new shape carries skillsCount so + // the caller can distinguish "scan failed" from "skills mined + // but no banner-quality insight." + const manifestPath = join(TMP_HOME, ".claude", "hivemind", "local-mined.json"); + mkdirSync(join(TMP_HOME, ".claude", "hivemind"), { recursive: true }); + writeFileSync(manifestPath, JSON.stringify({ + created_at: "x", + entries: [ + { skill_name: "a", canonical_path: "/x", symlinks: [], source_session_ids: [], source_session_paths: [], source_agent: "claude_code", gate_agent: "claude_code", created_at: "2026-05-22T00:00:00.000Z", uploaded: false }, + { skill_name: "b", canonical_path: "/x", symlinks: [], source_session_ids: [], source_session_paths: [], source_agent: "claude_code", gate_agent: "claude_code", created_at: "2026-05-22T00:00:01.000Z", uploaded: false }, + ], + })); + nextChildBehavior = { exitCode: 0 }; + nextInsightEntry = null; + nextSkillsCount = 2; + const result = await runInstallScan(); + expect(result.insight).toBeNull(); + expect(result.skillsCount).toBe(2); + }); + + it("unlinks a corrupt/truncated manifest left behind on timeout (codex P2)", async () => { + // Regression guard for codex P2: a timeout/crash mid-write can + // leave the manifest unparseable. Both canOfferInstallScan and + // maybeAutoMineLocal treat presence as "already mined", so a + // corrupt sentinel would permanently disable retries. + const manifestPath = join(TMP_HOME, ".claude", "hivemind", "local-mined.json"); + mkdirSync(join(TMP_HOME, ".claude", "hivemind"), { recursive: true }); + writeFileSync(manifestPath, "{ truncated json β€” not closed"); + nextChildBehavior = { exitCode: 1 }; // simulate child crash + nextInsightEntry = null; + const result = await runInstallScan(); + expect(result.insight).toBeNull(); + // Corrupt manifest unlinked β€” future retries can fire. + expect(existsSyncReal(manifestPath)).toBe(false); + }); + + it("PRESERVES the manifest when mine-local wrote skills but none have an insight (codex P2)", async () => { + // codex PR #198 P2: the prior fix was over-aggressive. When + // mine-local writes skills (entries.length > 0) but the gate + // omits `insight` on every one of them, we MUST NOT delete the + // manifest β€” countLocalManifestEntries() surfaces the count via + // the legacy SessionStart banner branch, and a future + // `push-local` needs the row metadata. Only the literal + // `entries: []` sentinel should be cleared. + const manifestPath = join(TMP_HOME, ".claude", "hivemind", "local-mined.json"); + mkdirSync(join(TMP_HOME, ".claude", "hivemind"), { recursive: true }); + writeFileSync(manifestPath, JSON.stringify({ + created_at: "x", + entries: [ + { skill_name: "a", canonical_path: "/x", symlinks: [], source_session_ids: [], source_session_paths: [], source_agent: "claude_code", gate_agent: "claude_code", created_at: "2026-05-22T00:00:00.000Z", uploaded: false }, + ], + })); + nextChildBehavior = { exitCode: 0 }; + nextInsightEntry = null; // no insight produced, but skills exist + const result = await runInstallScan(); + expect(result.insight).toBeNull(); + expect(existsSyncReal(manifestPath)).toBe(true); + }); + + it("preserves the manifest when an insight WAS produced", async () => { + const manifestPath = join(TMP_HOME, ".claude", "hivemind", "local-mined.json"); + mkdirSync(join(TMP_HOME, ".claude", "hivemind"), { recursive: true }); + writeFileSync(manifestPath, JSON.stringify({ + created_at: "x", + entries: [{ skill_name: "k", insight: "i", created_at: "z" }], + })); + nextChildBehavior = { exitCode: 0 }; + nextInsightEntry = { skill_name: "k", insight: "i", created_at: "z" }; + const result = await runInstallScan(); + expect(result.insight).not.toBeNull(); + expect(existsSyncReal(manifestPath)).toBe(true); + }); + + it("resolves with the latest insight entry on clean exit when one exists", async () => { + nextInsightEntry = { + skill_name: "verify-before-done", + insight: "You revisited 4 merged PRs in the last month.", + created_at: "2026-05-22T10:00:00.000Z", + }; + nextChildBehavior = { exitCode: 0 }; + const result = await runInstallScan(); + expect(result.insight).not.toBeNull(); + expect(result.insight!.skill_name).toBe("verify-before-done"); + }); + + it("resolves with null on non-zero exit", async () => { + nextChildBehavior = { exitCode: 1 }; + nextInsightEntry = { skill_name: "x", insight: "y", created_at: "z" }; + // Even if a manifest exists, a failed mine-local run shouldn't + // surface its (possibly stale) output. We treat non-zero exit as + // "scan failed β†’ fall through silently". + const result = await runInstallScan(); + expect(result.insight).toBeNull(); + }); + + it("resolves with null on spawn error", async () => { + nextChildBehavior = { emitError: new Error("ENOENT") }; + const result = await runInstallScan(); + expect(result.insight).toBeNull(); + }); + + it("resolves with null when process.argv[1] points at a missing file (safety guard)", async () => { + process.argv[1] = "/nonexistent/cli.js"; + const result = await runInstallScan(); + // Spawn should NOT have been called β€” we bail out at the + // existsSync check rather than letting node fail mid-spawn. + expect(result.insight).toBeNull(); + expect(spawnCalls).toHaveLength(0); + }); + + it("resolves with null when getLatestInsightEntry returns null (gate produced no insight)", async () => { + nextChildBehavior = { exitCode: 0 }; + nextInsightEntry = null; + const result = await runInstallScan(); + expect(result.insight).toBeNull(); + }); +}); + +describe("formatScanResult", () => { + function makeEntry(insight: string, name = "verify-before-done"): any { + return { + skill_name: name, + insight, + created_at: "2026-05-22T00:00:00.000Z", + }; + } + + it("renders insight, skill name, and emoji markers", () => { + const out = formatScanResult(makeEntry("You revisited 4 merged PRs.")); + expect(out).toContain("Found a pattern in your past sessions"); + expect(out).toContain("πŸ“Œ You revisited 4 merged PRs."); + expect(out).toContain("✨ Skill `verify-before-done` ready"); + }); + + it("collapses embedded whitespace (newlines, tabs) to single spaces", () => { + // Defense-in-depth: parseMultiVerdict already normalizes whitespace + // before persistence, but this renderer is the last guard before + // user-visible output and must not blindly trust the input. + const out = formatScanResult(makeEntry("Line one.\nLine\ttwo. Three.")); + expect(out).toContain("πŸ“Œ Line one. Line two. Three."); + expect(out).not.toContain("\nLine two"); + expect(out).not.toContain("\t"); + }); + + it("truncates insight over 280 chars at a word boundary with ellipsis", () => { + // 280-char cap matches the parseMultiVerdict storage cap, so the + // renderer never truncates beyond what the manifest can store. + // Bumped from 200 β†’ 280 after a real-world test caught a haiku + // insight mid-sentence at 200; 280 gives haiku room to finish a + // single sentence with the punchline intact. + const long = "x ".repeat(280).trim() + " end-marker"; + const out = formatScanResult(makeEntry(long)); + const insightLine = out.split("\n").find(l => l.includes("πŸ“Œ"))!; + // Allow ~10 chars of slack for the emoji + leading spaces. + expect(insightLine.length).toBeLessThanOrEqual(295); + expect(insightLine.endsWith("…")).toBe(true); + expect(insightLine).not.toContain("end-marker"); + }); + + it("passes through short insights without truncation", () => { + const out = formatScanResult(makeEntry("Short and concrete.")); + expect(out).toContain("Short and concrete."); + expect(out).not.toContain("…"); + }); + + it("handles missing insight gracefully (empty string, not crash)", () => { + // Should never happen in practice β€” caller checks for non-empty + // insight before calling formatScanResult β€” but defensive coding + // means a malformed entry doesn't crash the install. + const out = formatScanResult({ + skill_name: "x", + insight: undefined as any, + created_at: "z", + } as any); + expect(out).toContain("πŸ“Œ"); + expect(out).toContain("✨ Skill `x`"); + }); +}); diff --git a/tests/claude-code/mine-local-orchestrator.test.ts b/tests/claude-code/mine-local-orchestrator.test.ts index 7844adaf..2a181ca8 100644 --- a/tests/claude-code/mine-local-orchestrator.test.ts +++ b/tests/claude-code/mine-local-orchestrator.test.ts @@ -452,6 +452,39 @@ describe("runMineLocal: orchestrator branches", () => { expect(errSpy).toHaveBeenCalledWith(expect.stringContaining("mine-local v1 requires the Claude Code CLI")); }); + it("--only filters detectInstalledAgents to the requested agent", async () => { + // codex PR #198 P2 regression guard: the install-time scan + // advertises "scan your Claude Code sessions" but mine-local + // walks every installed agent by default. The --only flag scopes + // the picker so the prompt copy is honest. + const old = Date.now() - 5 * 60_000; + detectInstalledAgents.mockReturnValueOnce([ + { agent: "codex", sessionRoot: "/c", encodeCwd: () => "x" }, + { agent: "claude_code", sessionRoot: "/cc", encodeCwd: () => "x" }, + ]); + // listLocalSessions should be called with ONLY the claude_code + // install when --only claude_code is passed. + listLocalSessions.mockImplementationOnce((installs: any) => { + expect(installs).toHaveLength(1); + expect(installs[0].agent).toBe("claude_code"); + return [makeSession("a", old, "claude_code")]; + }); + const mod = await importOrch(); + await mod.runMineLocal(["--only", "claude_code", "--dry-run"]); + // Confirm the log mentions the filter so users can see what + // happened. + expect(logSpy).toHaveBeenCalledWith(expect.stringContaining("filtered to claude_code")); + }); + + it("--only with no matching install β†’ exit 1 with clear error", async () => { + detectInstalledAgents.mockReturnValueOnce([ + { agent: "codex", sessionRoot: "/c", encodeCwd: () => "x" }, + ]); + const mod = await importOrch(); + await expect(mod.runMineLocal(["--only", "claude_code"])).rejects.toThrow("__exit_1__"); + expect(errSpy).toHaveBeenCalledWith(expect.stringContaining("No 'claude_code' session directory detected")); + }); + it("host = codex but claude_code installed β†’ mining uses claude_code as gate", async () => { const old = Date.now() - 5 * 60_000; detectHostAgent.mockReturnValueOnce("codex");