diff --git a/.gitignore b/.gitignore
index a0b7da6d..d6754c7c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
-node_modules/
+node_modules
 dist/
 tmp/
 *.js.map
diff --git a/src/hooks/session-start.ts b/src/hooks/session-start.ts
index af944953..0e63390b 100644
--- a/src/hooks/session-start.ts
+++ b/src/hooks/session-start.ts
@@ -14,7 +14,6 @@ import { loadConfig } from "../config.js";
 import { DeeplakeApi } from "../deeplake-api.js";
 import { sqlStr } from "../utils/sql.js";
 import { projectNameFromCwd } from "../utils/project-name.js";
-import { listActiveOrgSkills, sessionBucket, buildSkillsActiveInsert, buildSkillsActivePath, skillRootsForCwd } from "../skillify/skills-active.js";
 import { readStdin } from "../utils/stdin.js";
 import { log as _log } from "../utils/debug.js";
 import { getInstalledVersion } from "../utils/version-check.js";
@@ -223,11 +222,6 @@ async function main(): Promise<void> {
   // freezes SessionStart. Hard opt-out via HIVEMIND_AUTOPULL_DISABLED=1.
   // All failures swallowed inside autoPullSkills (documented as
   // never-rejecting), so no try/catch needed here.
-  //
-  // Runs BEFORE the skill-attribution snapshot below so that a skill pulled
-  // (or upgraded) during THIS SessionStart is reflected in the recorded
-  // skills_active set — otherwise the row would capture a stale/empty set
-  // while the session can already use the freshly-pulled skill.
   const pullResult = await autoPullSkills();
   log(`autopull: pulled=${pullResult.pulled} skipped=${pullResult.skipped}`);
 
@@ -244,42 +238,6 @@ async function main(): Promise<void> {
           await api.ensureSessionsTable(sessionsTable);
           await createPlaceholder(api, table, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId, pluginVersion);
           log("placeholder created");
-
-          // Skill attribution (measurement): record which org-shared skills were in
-          // context this session + a deterministic A/B bucket. This is the label that
-          // makes skill value measurable (sessions with vs without skill X / v1 vs v2).
-          // Org skills are identified via the pull manifest (authoritative), not the
-          // `--` dirname pattern. Snapshot runs after auto-pull (above) so it reflects
-          // freshly-pulled skills. Opt-out: HIVEMIND_SKILL_ATTRIBUTION=0.
-          // Swallowed — must never fail SessionStart.
-          if (process.env.HIVEMIND_SKILL_ATTRIBUTION !== "0") {
-            try {
-              // Scan global + project-scoped (<cwd>/.claude/skills) roots so
-              // skills pulled with `--to project` are attributed too.
-              const skills = listActiveOrgSkills(skillRootsForCwd(input.cwd));
-              // Distinct `/skills_active/` namespace (NOT `/sessions/`) so the summary /
-              // raw-transcript readers never mistake this attribution row for a transcript.
-              const attrSessionPath = buildSkillsActivePath(config, input.session_id);
-              const attrFilename = attrSessionPath.slice(attrSessionPath.lastIndexOf("/") + 1);
-              const sql = buildSkillsActiveInsert({
-                sessionsTable,
-                sessionPath: attrSessionPath,
-                filename: attrFilename,
-                userName: config.userName,
-                projectName: projectNameFromCwd(input.cwd),
-                pluginVersion,
-                sessionId: input.session_id,
-                cwd: input.cwd,
-                skills,
-                bucket: sessionBucket(input.session_id),
-                ts: new Date().toISOString(),
-              });
-              await api.query(sql);
-              log(`skills_active recorded: ${skills.length} org skills, bucket ${sessionBucket(input.session_id)}`);
-            } catch (e: any) {
-              log(`skills_active attribution failed (swallowed): ${e?.message ?? e}`);
-            }
-          }
         } else {
           const reason = process.env.HIVEMIND_CAPTURE === "false"
             ? "HIVEMIND_CAPTURE=false"
diff --git a/src/skillify/claude-model.ts b/src/skillify/claude-model.ts
new file mode 100644
index 00000000..34ea08e6
--- /dev/null
+++ b/src/skillify/claude-model.ts
@@ -0,0 +1,53 @@
+/**
+ * Shared `claude -p` backend for the engine's LLM steps (success-judge, proposer).
+ * All tools denied → pure-text generation. Runs on the USER's own agent, so cost
+ * lands on the user. Returned as an injectable ModelCall so every LLM step is
+ * unit-testable with zero real calls.
+ */
+import { spawn } from "node:child_process";
+import { findAgentBin } from "./gate-runner.js";
+
+/** (systemPrompt, userPrompt) -> raw model text. */
+export type ModelCall = (systemPrompt: string, userPrompt: string) => Promise<string>;
+
+export function claudeModel(model: string, opts: { timeoutMs?: number } = {}): ModelCall {
+  const timeoutMs = opts.timeoutMs ?? 120_000;
+  return (system, user) => new Promise<string>((resolve, reject) => {
+    const args = [
+      "-p", user, "--model", model, "--no-session-persistence",
+      "--output-format", "json", "--system-prompt", system,
+      // Empty allow-list = NO tools available. Authoritative: it covers built-ins AND
+      // any MCP/configured tools (a deny-list can't enumerate those), so prompt-injected
+      // transcript text in the judge/proposer prompt can never trigger tool use.
+      "--tools", "",
+      // --strict-mcp-config ignores the user's MCP config entirely (--tools only denies
+      // USE, not LOADING) — a broken/oversized user MCP schema would otherwise fail every
+      // judge/proposer call before it returns JSON, silently stopping proposals.
+      "--strict-mcp-config",
+    ];
+    // HIVEMIND_CAPTURE=false so these calls aren't captured as real sessions, AND
+    // HIVEMIND_WIKI_WORKER=1 so the spawned claude -p skips this package's SessionStart
+    // hook entirely (no Deeplake-context injection into the prompt, no auto-pull/graph
+    // work) — one child per anchored invocation would otherwise contaminate the judge
+    // prompt and pile up background work. Same guard the other internal runners use.
+    // Resolve the claude binary the same way the rest of skillify does — a detached
+    // hook worker may not have it on PATH (e.g. ~/.claude/local/claude), and a bare
+    // "claude" would ENOENT and the callers would swallow it as no-change.
+    const child = spawn(findAgentBin("claude_code"), args, {
+      stdio: ["ignore", "pipe", "pipe"],
+      env: { ...process.env, HIVEMIND_CAPTURE: "false", HIVEMIND_WIKI_WORKER: "1" },
+    });
+    let out = "";
+    let err = "";
+    const timer = setTimeout(() => { child.kill("SIGKILL"); reject(new Error("claude timed out")); }, timeoutMs);
+    child.stdout.on("data", (d) => { out += String(d); });
+    child.stderr.on("data", (d) => { err += String(d); });
+    child.on("error", (e) => { clearTimeout(timer); reject(e); });
+    child.on("close", (code) => {
+      clearTimeout(timer);
+      if (code !== 0) return reject(new Error(`claude exit ${code}: ${err.slice(0, 200)}`));
+      try { resolve(String((JSON.parse(out) as { result?: unknown }).result ?? "")); }
+      catch { resolve(out); }
+    });
+  });
+}
diff --git a/src/skillify/deficiency-detector.ts b/src/skillify/deficiency-detector.ts
new file mode 100644
index 00000000..73b77514
--- /dev/null
+++ b/src/skillify/deficiency-detector.ts
@@ -0,0 +1,118 @@
+/**
+ * Deficiency detector — the core of the engine's "which skills are bad" step.
+ *
+ * For each org-skill invocation: window the transcript around it, run the FREE
+ * level-1 anchor (user pushback?), and only if anchored spend a level-2 judge
+ * call (was the task actually accomplished?). A "confirmed failure" requires BOTH
+ * — high precision, so we never churn a good skill. Aggregate per skill: a skill
+ * is deficient if it has enough invocations AND a high confirmed-failure rate.
+ *
+ * Token discipline: the judge runs ONLY on anchored windows (a fraction), on a
+ * windowed slice (not whole sessions). Everything injectable (query + judge model)
+ * so the whole orchestration is unit-tested with zero live Deeplake / LLM.
+ *
+ * The ≥5 fire gate lives with the caller (worker): we just return deficientCount.
+ */
+import {
+  listSkillInvocations, windowedTurns, elide, type QueryFn, type SkillInvocation,
+} from "./skill-invocations.js";
+import { detectAnchor } from "./session-anchor.js";
+import { judgeSuccess, type ModelCall } from "./success-judge.js";
+
+export interface SkillDeficiency {
+  name: string;
+  author: string;
+  invocations: number;        // org-skill invocations examined
+  anchored: number;           // had a level-1 anchor → judged
+  confirmedFailures: number;  // anchor AND judge said success=0
+  failureRate: number;        // confirmedFailures / invocations
+  deficient: boolean;         // failureRate >= threshold AND invocations >= minInvocations
+  examples: string[];         // a few failure reasons (for the proposer)
+}
+
+export interface DetectorConfig {
+  minInvocations?: number;       // min-n per skill before we trust the rate (default 8)
+  failureRateThreshold?: number; // confirmed-failure rate to flag deficient (default 0.4)
+  window?: { before?: number; after?: number; maxChars?: number };
+  judge?: ModelCall;             // injected; default = real claude judge
+  sinceIso?: string;             // lookback bound
+  limit?: number;                // cap invocation rows pulled
+}
+
+const skillKey = (name: string, author: string) => `${name}--${author}`;
+
+export interface ScoreConfig {
+  window?: { before?: number; after?: number; maxChars?: number };
+  judge?: ModelCall;
+}
+
+/**
+ * Score a set of invocations: window each, run the free anchor, and judge ONLY the
+ * anchored ones. Shared by the detector (per-skill deficiency) and the edit gate
+ * (a skill's failure rate in a time window).
+ */
+export async function scoreInvocations(
+  query: QueryFn,
+  sessionsTable: string,
+  invocations: SkillInvocation[],
+  cfg: ScoreConfig = {},
+): Promise<{ anchored: number; confirmed: number; examples: string[] }> {
+  let anchored = 0;
+  let confirmed = 0;
+  const examples: string[] = [];
+  for (const inv of invocations) {
+    const { turns, pivot } = await windowedTurns(query, sessionsTable, inv, cfg.window);
+    const anchor = detectAnchor(turns, pivot); // anchor only on post-invocation reaction
+    if (!anchor.anchored) continue; // free filter — no judge call
+    anchored++;
+    const window = elide(turns.map((t) => `${t.role}: ${t.text}`).join("\n\n"), cfg.window?.maxChars ?? 4000);
+    const verdict = await judgeSuccess(window, { model: cfg.judge });
+    if (verdict.success === 0) {
+      confirmed++;
+      if (examples.length < 3) examples.push(verdict.reason || anchor.evidence);
+    }
+  }
+  return { anchored, confirmed, examples };
+}
+
+export interface DetectionResult {
+  skills: SkillDeficiency[];
+  deficientCount: number;
+}
+
+export async function detectDeficientSkills(
+  query: QueryFn,
+  sessionsTable: string,
+  cfg: DetectorConfig = {},
+): Promise<DetectionResult> {
+  const minInvocations = cfg.minInvocations ?? 8;
+  const threshold = cfg.failureRateThreshold ?? 0.4;
+
+  const invocations = await listSkillInvocations(query, sessionsTable, { sinceIso: cfg.sinceIso, limit: cfg.limit });
+
+  const groups = new Map<string, SkillInvocation[]>();
+  for (const inv of invocations) {
+    const k = skillKey(inv.name, inv.author);
+    const arr = groups.get(k);
+    if (arr) arr.push(inv); else groups.set(k, [inv]);
+  }
+
+  const skills: SkillDeficiency[] = [];
+  for (const list of groups.values()) {
+    const { anchored, confirmed, examples } = await scoreInvocations(query, sessionsTable, list, cfg);
+    const failureRate = list.length ? confirmed / list.length : 0;
+    skills.push({
+      name: list[0].name,
+      author: list[0].author,
+      invocations: list.length,
+      anchored,
+      confirmedFailures: confirmed,
+      failureRate,
+      deficient: list.length >= minInvocations && failureRate >= threshold,
+      examples,
+    });
+  }
+
+  skills.sort((a, b) => b.failureRate - a.failureRate || b.invocations - a.invocations);
+  return { skills, deficientCount: skills.filter((s) => s.deficient).length };
+}
diff --git a/src/skillify/session-anchor.ts b/src/skillify/session-anchor.ts
new file mode 100644
index 00000000..83056d8d
--- /dev/null
+++ b/src/skillify/session-anchor.ts
@@ -0,0 +1,50 @@
+/**
+ * Heuristic "anchor" — a HARD, observable signal in the transcript that a session
+ * went badly, independent of any LLM judgment: the user pushed back on / corrected
+ * what the assistant just did. Pure + free (no LLM, no I/O).
+ *
+ * It's the level-1 filter in the outcome pipeline: only windows with an anchor go
+ * to the (paid) success-judge, and a session is labelled a failure only when the
+ * anchor AND the judge agree. So this is deliberately tuned for RECALL over
+ * precision — a false positive just costs one judge call (which then drops it),
+ * but a false negative under-detects (conservative — it never churns a good skill).
+ * Patterns are meant to be tuned against real sessions; this is a starting set.
+ */
+import type { Turn } from "./skill-invocations.js";
+
+export type AnchorKind = "correction" | "none";
+export interface Anchor {
+  anchored: boolean;
+  kind: AnchorKind;
+  evidence: string; // the user turn that triggered it (truncated)
+}
+
+// Unambiguous correction — ALWAYS an anchor, even amid polite words. This must
+// win over BENIGN so "thanks, but this is still failing" still fires.
+const STRONG = /\b(wrong|incorrect|not what|that'?s not|does ?n'?t work|did ?n'?t work|do ?n'?t work|wo ?n'?t work|is ?n'?t|broke|broken|still (failing|broken|not working|wrong|the same)|try again|undo|revert that|that fail|not right)/i;
+
+// Ambiguous negation: "no" is pushback ("no, that's off") but also benign
+// ("no problem"), so it only anchors when the turn isn't a clear benign phrase.
+const AMBIGUOUS = /\b(no|nope)\b/i;
+const BENIGN = /\b(no (problem|worries|need|biggie)|no,? thanks|all good|works? (now|great|fine|perfectly)|that works|perfect|looks good)\b/i;
+
+/**
+ * Detect a correction anchor in a windowed slice of turns. A pushback is a USER turn
+ * reacting to an ASSISTANT turn — and BOTH must be POST-invocation (index ≥ fromIndex),
+ * so a correction that happened BEFORE the skill ran (e.g. the skill was a repair
+ * attempt) isn't misattributed to this skill. fromIndex defaults to 0 (scan all).
+ * Recall-oriented: a strong correction fires regardless of polite framing; only the
+ * bare "no" is benign-gated.
+ */
+export function detectAnchor(turns: Turn[], fromIndex = 0): Anchor {
+  for (let i = Math.max(1, fromIndex); i < turns.length; i++) {
+    const t = turns[i];
+    if (t.role !== "USER" || turns[i - 1].role !== "ASSISTANT") continue;
+    if (i - 1 < fromIndex) continue; // the assistant being reacted to must be post-invocation
+    const anchored = STRONG.test(t.text) || (AMBIGUOUS.test(t.text) && !BENIGN.test(t.text));
+    if (anchored) {
+      return { anchored: true, kind: "correction", evidence: t.text.slice(0, 200) };
+    }
+  }
+  return { anchored: false, kind: "none", evidence: "" };
+}
diff --git a/src/skillify/skill-edit-gate.ts b/src/skillify/skill-edit-gate.ts
new file mode 100644
index 00000000..50d01450
--- /dev/null
+++ b/src/skillify/skill-edit-gate.ts
@@ -0,0 +1,89 @@
+/**
+ * Edit-outcome gate — the validation organ (the paper's gate, adapted).
+ *
+ * A randomized A/B is the ideal, but it needs the skill VERSION recorded at
+ * invocation time (a capture change we don't have yet — the Skill tool_use only
+ * carries the skill name). So the feasible gate is LONGITUDINAL: after an edit is
+ * published, compare the skill's confirmed-failure rate in the window AFTER publish
+ * vs BEFORE. A real drop = the edit helped → keep; a real rise = it hurt → revert
+ * (one `cp` from the SKILL.v<old>.bak backup). No clear signal / too few post-publish
+ * uses → inconclusive (wait, or revert when stale).
+ *
+ * It's OBSERVATIONAL (confounded — the population shifts week to week), so it needs
+ * a margin + a minimum sample. Randomized A/B is the clean upgrade once invocation-
+ * version capture lands. Reuses scoreInvocations, so the same anchor+judge that
+ * detects deficiency also validates the fix. Injected query/judge → unit-testable.
+ */
+import { listSkillInvocations, type QueryFn } from "./skill-invocations.js";
+import { scoreInvocations } from "./deficiency-detector.js";
+import type { ModelCall } from "./claude-model.js";
+
+export interface WindowStats {
+  invocations: number;
+  anchored: number;
+  confirmed: number;
+  failureRate: number; // confirmed / invocations
+}
+
+export interface GateDecision {
+  before: WindowStats;
+  after: WindowStats;
+  delta: number; // before.failureRate - after.failureRate (positive = improved)
+  decision: "keep" | "revert" | "inconclusive";
+}
+
+interface MeasureOpts {
+  sinceIso?: string;
+  untilIso?: string;
+  limit?: number;
+  window?: { before?: number; after?: number; maxChars?: number };
+  judge?: ModelCall;
+}
+
+/** Confirmed-failure rate for one skill over a time window. */
+export async function measureSkillFailureRate(
+  query: QueryFn,
+  sessionsTable: string,
+  name: string,
+  author: string,
+  opts: MeasureOpts = {},
+): Promise<WindowStats> {
+  const all = await listSkillInvocations(query, sessionsTable, { sinceIso: opts.sinceIso, untilIso: opts.untilIso, limit: opts.limit });
+  const mine = all.filter((i) => i.name === name && i.author === author);
+  const { anchored, confirmed } = await scoreInvocations(query, sessionsTable, mine, { window: opts.window, judge: opts.judge });
+  return { invocations: mine.length, anchored, confirmed, failureRate: mine.length ? confirmed / mine.length : 0 };
+}
+
+/** Pure decision from before/after stats. */
+export function gateEditOutcome(
+  before: WindowStats,
+  after: WindowStats,
+  opts: { margin?: number; minAfter?: number } = {},
+): GateDecision {
+  const margin = opts.margin ?? 0.2;
+  const minAfter = opts.minAfter ?? 5;
+  const delta = before.failureRate - after.failureRate;
+  let decision: GateDecision["decision"];
+  if (after.invocations < minAfter) decision = "inconclusive";              // not enough post-publish use
+  else if (delta >= margin) decision = "keep";                              // failure rate dropped → helped
+  else if (after.failureRate - before.failureRate >= margin) decision = "revert"; // got measurably worse
+  else decision = "inconclusive";                                          // no clear signal
+  return { before, after, delta, decision };
+}
+
+/** Full gate: measure before/after a publish timestamp and decide. */
+export async function gateEdit(
+  query: QueryFn,
+  sessionsTable: string,
+  name: string,
+  author: string,
+  publishIso: string,
+  opts: { windowDays?: number; nowIso?: string; margin?: number; minAfter?: number } & MeasureOpts = {},
+): Promise<GateDecision> {
+  const windowDays = opts.windowDays ?? 14;
+  const beforeSince = new Date(Date.parse(publishIso) - windowDays * 24 * 60 * 60 * 1000).toISOString();
+  const shared = { limit: opts.limit, window: opts.window, judge: opts.judge };
+  const before = await measureSkillFailureRate(query, sessionsTable, name, author, { ...shared, sinceIso: beforeSince, untilIso: publishIso });
+  const after = await measureSkillFailureRate(query, sessionsTable, name, author, { ...shared, sinceIso: publishIso, untilIso: opts.nowIso });
+  return gateEditOutcome(before, after, opts);
+}
diff --git a/src/skillify/skill-edits.ts b/src/skillify/skill-edits.ts
new file mode 100644
index 00000000..115c1aa9
--- /dev/null
+++ b/src/skillify/skill-edits.ts
@@ -0,0 +1,109 @@
+/**
+ * Structured, bounded edits over a markdown SKILL.md — the paper's edit operations
+ * (append / insert_after / replace / delete) and "textual learning rate" (edit
+ * budget). Port of SkillOpt's skillopt/optimizer/skill.py.
+ *
+ * A protected region — between <!-- SLOW_UPDATE_START --> and <!-- SLOW_UPDATE_END -->
+ * — holds longitudinal guidance that fast per-edit changes must NOT touch (the
+ * paper's slow-update). Edits targeting it are skipped, and `append` lands above it.
+ *
+ * Pure + deterministic — no I/O, fully unit-testable.
+ */
+export type EditOp = "append" | "insert_after" | "replace" | "delete";
+export interface Edit {
+  op: EditOp;
+  target?: string;  // anchor text for insert_after / replace / delete
+  content?: string; // new text for append / insert_after / replace
+}
+
+export const SU_START = "<!-- SLOW_UPDATE_START -->";
+export const SU_END = "<!-- SLOW_UPDATE_END -->";
+
+function protectedRange(skill: string): [number, number] | null {
+  const a = skill.indexOf(SU_START);
+  const b = skill.indexOf(SU_END);
+  if (a === -1 || b === -1 || b < a) return null;
+  return [a, b + SU_END.length];
+}
+
+function targetsProtected(skill: string, target: string): boolean {
+  const r = protectedRange(skill);
+  if (!r || !target) return false;
+  const idx = skill.indexOf(target);
+  if (idx === -1) return false;
+  // Reject if the target RANGE [idx, idx+len) overlaps the protected range at all —
+  // not just if it starts inside it (a target that begins just before SLOW_UPDATE_START
+  // and spans into the block must not be allowed to delete protected guidance).
+  return idx < r[1] && idx + target.length > r[0];
+}
+
+/** Enforce the edit budget ("textual learning rate"): keep at most `budget` edits. */
+export function selectEdits(edits: Edit[], budget: number): Edit[] {
+  return edits.slice(0, Math.max(0, budget));
+}
+
+export interface ApplyResult {
+  skill: string;
+  report: string[];
+  applied: number; // how many edits actually changed the doc
+}
+
+/** Apply bounded structured edits; protected-region targets are skipped. */
+export function applyEdits(skill: string, edits: Edit[]): ApplyResult {
+  let s = skill;
+  const report: string[] = [];
+  let applied = 0;
+  const ok = (msg: string) => { applied++; report.push(`OK ${msg}`); };
+
+  for (const e of edits) {
+    if (e.target && targetsProtected(s, e.target)) {
+      report.push(`SKIP ${e.op}: targets protected slow-update region`);
+      continue;
+    }
+    switch (e.op) {
+      case "append": {
+        const content = (e.content ?? "").trim();
+        if (!content) { report.push("SKIP append: empty content"); break; }
+        const r = protectedRange(s);
+        if (r) s = s.slice(0, r[0]) + content + "\n\n" + s.slice(r[0]);
+        else s = s.replace(/\s*$/, "") + "\n\n" + content + "\n";
+        ok(`append (+${content.length} chars)`);
+        break;
+      }
+      case "insert_after": {
+        const target = e.target ?? "";
+        const content = (e.content ?? "").trim();
+        if (!target || !content) { report.push("SKIP insert_after: missing target/content"); break; }
+        const idx = s.indexOf(target);
+        if (idx === -1) { report.push("SKIP insert_after: target not found"); break; }
+        const lineEnd = s.indexOf("\n", idx + target.length);
+        const at = lineEnd === -1 ? s.length : lineEnd;
+        s = s.slice(0, at) + "\n" + content + s.slice(at);
+        ok("insert_after");
+        break;
+      }
+      case "replace": {
+        const target = e.target ?? "";
+        const content = e.content ?? "";
+        if (!target) { report.push("SKIP replace: missing target"); break; }
+        const idx = s.indexOf(target);
+        if (idx === -1) { report.push("SKIP replace: target not found"); break; }
+        s = s.slice(0, idx) + content + s.slice(idx + target.length);
+        ok("replace");
+        break;
+      }
+      case "delete": {
+        const target = e.target ?? "";
+        if (!target) { report.push("SKIP delete: missing target"); break; }
+        const idx = s.indexOf(target);
+        if (idx === -1) { report.push("SKIP delete: target not found"); break; }
+        s = s.slice(0, idx) + s.slice(idx + target.length);
+        ok("delete");
+        break;
+      }
+      default:
+        report.push(`SKIP unknown op: ${(e as Edit).op}`);
+    }
+  }
+  return { skill: s, report, applied };
+}
diff --git a/src/skillify/skill-invocations.ts b/src/skillify/skill-invocations.ts
new file mode 100644
index 00000000..ae9ed9b7
--- /dev/null
+++ b/src/skillify/skill-invocations.ts
@@ -0,0 +1,192 @@
+/**
+ * Read side of skill *invocation* attribution — the basis for deficiency detection.
+ *
+ * A skill can only help or hurt if the agent actually INVOKED it. Claude Code
+ * records each invocation as a `Skill` tool_use, which capture.ts persists as a
+ * tool_call row: `message.tool_name === "Skill"`, `message.tool_input` a JSON
+ * string `{ skill: "<name>--<author>", args? }`. We key on these real invocations
+ * rather than availability (the dropped skills_active) because:
+ *   - it's accurate — availability-without-invocation is pure noise, and
+ *   - it pins the exact turn, so we can window the judge tightly around it.
+ *
+ * Org skills only: the invoked `skill` is `<name>--<author>`. Plugin-namespaced
+ * (`hivemind:...`) and bare skills are not org-mined skills and are skipped.
+ *
+ * Every query is injected (QueryFn), so this is unit-testable with no live Deeplake.
+ */
+import { sqlStr } from "../utils/sql.js";
+
+export type QueryFn = (sql: string) => Promise<Array<Record<string, unknown>>>;
+
+export interface SkillInvocation {
+  sessionId: string;
+  name: string;
+  author: string;
+  ts: string; // invocation timestamp (message.timestamp, else the row's last_update_date)
+}
+
+interface ParsedMsg {
+  type?: string;
+  tool_name?: string;
+  tool_input?: unknown;
+  content?: unknown;
+  session_id?: unknown;
+  timestamp?: unknown;
+}
+
+function parseMessage(m: unknown): ParsedMsg | null {
+  if (m == null) return null;
+  if (typeof m === "string") {
+    try { return JSON.parse(m) as ParsedMsg; } catch { return null; }
+  }
+  if (typeof m === "object") return m as ParsedMsg;
+  return null;
+}
+
+/** The skill ref invoked by a tool_call message (e.g. "name--author"), else null. */
+export function invokedSkillRef(msg: ParsedMsg): string | null {
+  if (msg.type !== "tool_call" || msg.tool_name !== "Skill") return null;
+  let input: unknown = msg.tool_input;
+  if (typeof input === "string") { try { input = JSON.parse(input); } catch { return null; } }
+  const skill = (input as { skill?: unknown })?.skill;
+  return typeof skill === "string" && skill.length > 0 ? skill : null;
+}
+
+/** Split "<name>--<author>" → parts. null for plugin-namespaced / bare / malformed refs. */
+export function splitOrgSkill(skill: string): { name: string; author: string } | null {
+  if (skill.includes(":")) return null; // plugin-namespaced (e.g. hivemind:hivemind-memory)
+  // name/author are used to build filesystem paths (skills dir, proposals dir), so a
+  // captured tool_input must not smuggle path separators / traversal — same untrusted
+  // treatment the pull path applies to these segments.
+  if (skill.includes("/") || skill.includes("\\") || skill.includes("..")) return null;
+  const i = skill.lastIndexOf("--");
+  if (i <= 0 || i + 2 >= skill.length) return null; // bare or malformed
+  return { name: skill.slice(0, i), author: skill.slice(i + 2) };
+}
+
+/**
+ * Org-skill invocations across captured sessions, newest first. Coarse prefilter
+ * on `"Skill"` (robust to JSONB colon-spacing) then a precise in-code check, so a
+ * stray "Skill" in prose can't slip through as a real invocation.
+ */
+export async function listSkillInvocations(
+  query: QueryFn,
+  sessionsTable: string,
+  opts: { sinceIso?: string; untilIso?: string; limit?: number } = {},
+): Promise<SkillInvocation[]> {
+  const where = [`CAST(message AS TEXT) LIKE '%"Skill"%'`];
+  if (opts.sinceIso) where.push(`last_update_date >= '${sqlStr(opts.sinceIso)}'`);
+  if (opts.untilIso) where.push(`last_update_date < '${sqlStr(opts.untilIso)}'`);
+  const limit = opts.limit && opts.limit > 0 ? ` LIMIT ${Math.floor(opts.limit)}` : "";
+  const rows = await query(
+    `SELECT message, last_update_date FROM "${sessionsTable}" WHERE ${where.join(" AND ")} ORDER BY last_update_date DESC${limit}`,
+  );
+  const out: SkillInvocation[] = [];
+  for (const r of rows) {
+    const m = parseMessage(r.message);
+    if (!m) continue;
+    const ref = invokedSkillRef(m);
+    if (!ref) continue;
+    const parts = splitOrgSkill(ref);
+    if (!parts) continue;
+    const sessionId = typeof m.session_id === "string" ? m.session_id : "";
+    if (!sessionId) continue;
+    out.push({
+      sessionId,
+      name: parts.name,
+      author: parts.author,
+      ts: typeof m.timestamp === "string" ? m.timestamp
+        : (typeof r.last_update_date === "string" ? r.last_update_date : ""),
+    });
+  }
+  return out;
+}
+
+export interface Turn { role: "USER" | "ASSISTANT"; text: string }
+
+/**
+ * Reconstruct the transcript turns of a session, and mark where (between which two
+ * turns) the given invocation happened — so callers can window around it.
+ */
+/** Escape SQL LIKE wildcards (% _ \) so a session id with those chars matches literally. */
+function likeEscape(s: string): string {
+  return s.replace(/([\\%_])/g, "\\$1");
+}
+
+async function sessionTurns(
+  query: QueryFn, sessionsTable: string, inv: SkillInvocation,
+): Promise<{ turns: Turn[]; invIndex: number }> {
+  const sid = sqlStr(likeEscape(inv.sessionId));
+  const rows = await query(
+    `SELECT message FROM "${sessionsTable}" WHERE path LIKE '/sessions/%${sid}%' ESCAPE '\\' ORDER BY creation_date ASC`,
+  );
+  const turns: Turn[] = [];
+  let invIndex = -1;
+  for (const r of rows) {
+    const j = parseMessage(r.message);
+    if (!j) continue;
+    // Exact session match: `path LIKE %sid%` can match a substring/wildcard collision,
+    // so drop any row whose recorded session_id isn't this exact session.
+    if (typeof j.session_id === "string" && j.session_id !== inv.sessionId) continue;
+    // The invocation itself is a tool_call (not a turn): mark its position then skip.
+    const ref = invokedSkillRef(j);
+    if (ref) {
+      const p = splitOrgSkill(ref);
+      if (invIndex < 0 && p && p.name === inv.name && p.author === inv.author
+        && (typeof j.timestamp !== "string" || !inv.ts || j.timestamp === inv.ts)) {
+        invIndex = turns.length;
+      }
+      continue;
+    }
+    const text = typeof j.content === "string" ? j.content.trim() : "";
+    if (!text) continue;
+    if (j.type === "user_message") turns.push({ role: "USER", text });
+    else if (j.type === "assistant_message") turns.push({ role: "ASSISTANT", text });
+  }
+  if (invIndex < 0) invIndex = turns.length; // invocation not located → treat as session end
+  return { turns, invIndex };
+}
+
+/**
+ * The transcript window around an invocation: `before` turns before it and `after`
+ * turns after — where the help-or-harm signal lives — head+tail elided to maxChars.
+ * `before`/`after` are tunable; defaults chosen as a small starting point.
+ */
+/** A windowed slice plus `pivot` = the index in `turns` of the first POST-invocation
+ * turn (turns before it are the pre-invocation context — kept for the judge, but the
+ * anchor must not scan them, or a prior correction gets misattributed to this skill). */
+export interface WindowSlice {
+  turns: Turn[];
+  pivot: number;
+}
+
+export async function windowedTurns(
+  query: QueryFn,
+  sessionsTable: string,
+  inv: SkillInvocation,
+  opts: { before?: number; after?: number } = {},
+): Promise<WindowSlice> {
+  const before = opts.before ?? 3;
+  const after = opts.after ?? 6;
+  const { turns, invIndex } = await sessionTurns(query, sessionsTable, inv);
+  const start = Math.max(0, invIndex - before);
+  return { turns: turns.slice(start, invIndex + after), pivot: invIndex - start };
+}
+
+/** Head+tail elide a string to maxChars (so a pasted log/diff can't blow a prompt). */
+export function elide(text: string, maxChars: number): string {
+  if (text.length <= maxChars) return text;
+  const head = text.slice(0, Math.floor(maxChars * 0.55));
+  const tail = text.slice(text.length - Math.floor(maxChars * 0.45));
+  return `${head}\n\n…[${text.length - maxChars} chars elided]…\n\n${tail}`;
+}
+
+export async function windowAroundInvocation(
+  query: QueryFn,
+  sessionsTable: string,
+  inv: SkillInvocation,
+  opts: { before?: number; after?: number; maxChars?: number } = {},
+): Promise<string> {
+  const { turns } = await windowedTurns(query, sessionsTable, inv, opts);
+  return elide(turns.map((t) => `${t.role}: ${t.text}`).join("\n\n"), opts.maxChars ?? 4000);
+}
diff --git a/src/skillify/skill-proposer.ts b/src/skillify/skill-proposer.ts
new file mode 100644
index 00000000..1032b1bd
--- /dev/null
+++ b/src/skillify/skill-proposer.ts
@@ -0,0 +1,91 @@
+/**
+ * Proposer — the engine's "reflect → edit" step (the paper's backward pass).
+ * Given a deficient skill's body + the concrete failures the detector confirmed,
+ * the optimizer diagnoses the single recurring weakness and proposes a SMALL set
+ * of structured edits, bounded by the edit budget ("textual learning rate"). The
+ * protected slow-update region is off-limits. Edits are applied locally to produce
+ * the candidate body — NOTHING is published here (publish is a separate, gated step).
+ *
+ * Runs on the user's agent via an injected ModelCall (default = claude sonnet),
+ * so the reflect logic is unit-testable with zero real LLM calls.
+ */
+import { applyEdits, selectEdits, SU_START, SU_END, type Edit, type EditOp } from "./skill-edits.js";
+import { claudeModel, type ModelCall } from "./claude-model.js";
+
+export interface Proposal {
+  edits: Edit[];       // edits kept after the budget
+  editedBody: string;  // skill body after applying them
+  report: string[];    // per-edit OK/SKIP log
+  changed: boolean;    // did anything actually change?
+}
+
+export interface ProposeConfig {
+  editBudget?: number;     // max edits to keep (default 3)
+  model?: ModelCall;       // injected; default = claude sonnet
+  priorEdits?: string[];   // meta-skill: edits already tried for this skill (don't repeat)
+}
+
+const SYSTEM =
+  "You improve an engineering SKILL document that has been producing repeated, " +
+  "confirmed failures. Diagnose the SINGLE recurring weakness behind the failures " +
+  "and propose a SMALL set of structured edits that fix it. Do NOT rewrite the " +
+  `whole doc, and do NOT touch anything between ${SU_START} and ${SU_END}. Reply ` +
+  'with ONLY a JSON array of edits, each: {"op":"append|insert_after|replace|' +
+  'delete","target":"<exact existing text to anchor on; required for ' +
+  'insert_after/replace/delete>","content":"<new text; required for ' +
+  'append/insert_after/replace>"}. Prefer the smallest change that fixes the weakness.';
+
+function buildUserPrompt(body: string, failures: string[], priorEdits: string[]): string {
+  const cases = failures.slice(0, 8).map((f, i) => `${i + 1}. ${f}`).join("\n");
+  const prior = priorEdits.length
+    ? `\n\nALREADY TRIED for this skill on earlier runs (do NOT repeat these — propose something different, or nothing):\n${priorEdits.slice(0, 12).map((p) => `- ${p}`).join("\n")}`
+    : "";
+  return `CURRENT SKILL:\n${body}\n\nCONFIRMED FAILURES it produced (user pushed back AND a judge confirmed the task was not accomplished):\n${cases}${prior}\n\nPropose the bounded edits. JSON array only.`;
+}
+
+const OPS = new Set<EditOp>(["append", "insert_after", "replace", "delete"]);
+
+/** Tolerant parse of a JSON array of edits (handles ```fences / surrounding prose). */
+export function parseEdits(raw: string): Edit[] {
+  let s = raw.trim();
+  const fence = s.match(/```(?:json)?\s*([\s\S]*?)```/);
+  if (fence) s = fence[1].trim();
+  const a = s.indexOf("[");
+  const b = s.lastIndexOf("]");
+  if (a === -1 || b <= a) return [];
+  let arr: unknown;
+  try { arr = JSON.parse(s.slice(a, b + 1)); } catch { return []; }
+  if (!Array.isArray(arr)) return [];
+  const out: Edit[] = [];
+  for (const e of arr) {
+    if (!e || typeof e !== "object") continue;
+    const op = (e as { op?: unknown }).op;
+    if (typeof op !== "string" || !OPS.has(op as EditOp)) continue;
+    const target = (e as { target?: unknown }).target;
+    const content = (e as { content?: unknown }).content;
+    out.push({
+      op: op as EditOp,
+      ...(typeof target === "string" ? { target } : {}),
+      ...(typeof content === "string" ? { content } : {}),
+    });
+  }
+  return out;
+}
+
+export async function proposeSkillEdit(
+  skillBody: string,
+  failures: string[],
+  cfg: ProposeConfig = {},
+): Promise<Proposal> {
+  const budget = cfg.editBudget ?? 3;
+  const model = cfg.model ?? claudeModel("sonnet");
+  let raw: string;
+  try {
+    raw = await model(SYSTEM, buildUserPrompt(skillBody, failures, cfg.priorEdits ?? []));
+  } catch {
+    return { edits: [], editedBody: skillBody, report: ["proposer model call failed"], changed: false };
+  }
+  const edits = selectEdits(parseEdits(raw), budget);
+  const { skill, report, applied } = applyEdits(skillBody, edits);
+  return { edits, editedBody: skill, report, changed: applied > 0 };
+}
diff --git a/src/skillify/skill-publisher.ts b/src/skillify/skill-publisher.ts
new file mode 100644
index 00000000..a647dcb5
--- /dev/null
+++ b/src/skillify/skill-publisher.ts
@@ -0,0 +1,64 @@
+/**
+ * Publish mechanism: write an accepted skill edit to the LIVE SKILL.md via the
+ * native skills dir (the only legitimate channel — never the model's prompt
+ * context; see PR #223). Bumps the frontmatter version (enables v1-vs-v2) and
+ * keeps a backup so a bad edit is one `cp` from revert.
+ *
+ * This is the mechanism only. The worker does NOT call it on an unvalidated edit
+ * (the offline gate isn't trustworthy — see the spike findings); it writes a
+ * review proposal instead, and live publish is reserved for an edit that has
+ * passed the real-usage A/B gate (deferred). Pure fs; testable against a tmp dir.
+ */
+import fs from "node:fs";
+import path from "node:path";
+
+export interface PublishResult {
+  path: string;
+  oldVersion: number;
+  newVersion: number;
+  backupPath: string;
+}
+
+/** Split a SKILL.md into its frontmatter block (incl. fences) and the body. */
+export function splitFrontmatter(md: string): { frontmatter: string; body: string } {
+  const m = md.match(/^(---\n[\s\S]*?\n---\n)([\s\S]*)$/);
+  if (m) return { frontmatter: m[1], body: m[2] };
+  return { frontmatter: "", body: md };
+}
+
+/** Bump `version: N` in a frontmatter block (absent → treat as 1 → 2). */
+export function bumpVersion(frontmatter: string): { frontmatter: string; oldVersion: number; newVersion: number } {
+  const m = frontmatter.match(/^version:\s*(\d+)\s*$/m);
+  const oldVersion = m ? parseInt(m[1], 10) : 1;
+  const newVersion = oldVersion + 1;
+  let next: string;
+  if (m) {
+    next = frontmatter.replace(/^version:\s*\d+\s*$/m, `version: ${newVersion}`); // has a version line
+  } else if (/\n---\n$/.test(frontmatter)) {
+    next = frontmatter.replace(/\n---\n$/, `\nversion: ${newVersion}\n---\n`);     // frontmatter, no version
+  } else {
+    next = `---\nversion: ${newVersion}\n---\n`;                                   // no frontmatter → create one
+  }
+  return { frontmatter: next, oldVersion, newVersion };
+}
+
+/**
+ * Write `editedBody` to the skill's live SKILL.md, version bumped, original backed
+ * up to SKILL.v<old>.bak.md. Throws if the skill dir / file isn't present.
+ */
+export function publishSkillEdit(
+  skillsRoot: string,
+  name: string,
+  author: string,
+  editedBody: string,
+): PublishResult {
+  const dir = path.join(skillsRoot, `${name}--${author}`);
+  const file = path.join(dir, "SKILL.md");
+  const existing = fs.readFileSync(file, "utf8");
+  const { frontmatter } = splitFrontmatter(existing);
+  const { frontmatter: bumped, oldVersion, newVersion } = bumpVersion(frontmatter);
+  const backupPath = path.join(dir, `SKILL.v${oldVersion}.bak.md`);
+  fs.writeFileSync(backupPath, existing);
+  fs.writeFileSync(file, `${bumped}${editedBody.trimEnd()}\n`);
+  return { path: file, oldVersion, newVersion, backupPath };
+}
diff --git a/src/skillify/skillopt-engine.ts b/src/skillify/skillopt-engine.ts
new file mode 100644
index 00000000..4d1a5f8f
--- /dev/null
+++ b/src/skillify/skillopt-engine.ts
@@ -0,0 +1,131 @@
+/**
+ * The weekly SkillOpt cycle, wired end to end and fully injectable:
+ *
+ *   detect deficient skills  →  ≥5 fire gate  →  for each: read body, propose a
+ *   bounded edit, write a REVIEW PROPOSAL (not a live publish).
+ *
+ * Why proposals, not auto-publish: the offline gate isn't trustworthy (spike
+ * finding), so we never auto-overwrite a live skill. The engine surfaces concrete,
+ * evidence-backed edit proposals; turning one live is gated on the real-usage A/B
+ * (deferred) or a human. Everything is injected (query, judge/proposer models, the
+ * skill reader, the proposal writer), so this orchestration is unit-tested with no
+ * Deeplake / LLM / fs.
+ */
+import fs from "node:fs";
+import path from "node:path";
+import { detectDeficientSkills, type DetectorConfig } from "./deficiency-detector.js";
+import { proposeSkillEdit, type ProposeConfig } from "./skill-proposer.js";
+import { splitFrontmatter } from "./skill-publisher.js";
+import type { QueryFn } from "./skill-invocations.js";
+import type { Edit } from "./skill-edits.js";
+import type { PulledManifest } from "./manifest.js";
+
+export interface ProposalRecord {
+  name: string;
+  author: string;
+  invocations: number;
+  confirmedFailures: number;
+  failureRate: number;
+  examples: string[];
+  edits: Edit[];
+  report: string[];
+  candidateBody: string;
+  createdAt: string;
+}
+
+export interface CycleDeps {
+  query: QueryFn;
+  sessionsTable: string;
+  readSkillBody: (name: string, author: string) => string | null; // null when not installed locally
+  writeProposal: (rec: ProposalRecord) => void;
+  detector?: DetectorConfig;
+  proposer?: ProposeConfig;
+  fireThreshold?: number; // deficient-skill count to fire (default 5)
+  maxProposals?: number;  // cap edits proposed per cycle (default 10)
+  now: string;            // ISO timestamp (injected — Date is awkward in workers)
+  meta?: {                // optimizer cross-run memory (skillopt-meta); optional
+    prior: (name: string, author: string) => string[];
+    has: (name: string, author: string, edits: Edit[]) => boolean;
+    record: (name: string, author: string, edits: Edit[]) => void;
+  };
+}
+
+export interface CycleResult {
+  deficientCount: number;
+  fired: boolean;
+  proposals: Array<{ name: string; author: string; changed: boolean; failureRate: number }>;
+}
+
+export async function runSkillOptCycle(deps: CycleDeps): Promise<CycleResult> {
+  const fireThreshold = deps.fireThreshold ?? 5;
+  const { skills, deficientCount } = await detectDeficientSkills(deps.query, deps.sessionsTable, deps.detector);
+
+  // The ≥N gate: only act on a real PATTERN of deficiency, not one or two noisy skills.
+  if (deficientCount < fireThreshold) {
+    return { deficientCount, fired: false, proposals: [] };
+  }
+
+  const targets = skills.filter((s) => s.deficient).slice(0, deps.maxProposals ?? 10);
+  const proposals: CycleResult["proposals"] = [];
+  for (const s of targets) {
+    const body = deps.readSkillBody(s.name, s.author);
+    if (!body) continue; // not installed locally → nothing to edit
+    const priorEdits = deps.meta?.prior(s.name, s.author) ?? [];
+    const p = await proposeSkillEdit(body, s.examples, { ...deps.proposer, priorEdits });
+    // dedup against the meta memory — don't re-write an edit already tried for this skill.
+    const isDup = p.changed && (deps.meta?.has(s.name, s.author, p.edits) ?? false);
+    if (p.changed && !isDup) {
+      deps.writeProposal({
+        name: s.name, author: s.author,
+        invocations: s.invocations, confirmedFailures: s.confirmedFailures, failureRate: s.failureRate,
+        examples: s.examples, edits: p.edits, report: p.report,
+        candidateBody: p.editedBody, createdAt: deps.now,
+      });
+      deps.meta?.record(s.name, s.author, p.edits);
+    }
+    proposals.push({ name: s.name, author: s.author, changed: p.changed && !isDup, failureRate: s.failureRate });
+  }
+  return { deficientCount, fired: true, proposals };
+}
+
+/** Default proposal writer: <proposalsRoot>/<name>--<author>/{proposal.json,candidate.md}. */
+export function writeProposalToDisk(proposalsRoot: string, rec: ProposalRecord): string {
+  const dir = path.join(proposalsRoot, `${rec.name}--${rec.author}`);
+  fs.mkdirSync(dir, { recursive: true });
+  fs.writeFileSync(path.join(dir, "candidate.md"), rec.candidateBody.trimEnd() + "\n");
+  fs.writeFileSync(path.join(dir, "proposal.json"), JSON.stringify(rec, null, 2) + "\n");
+  return dir;
+}
+
+/** Read a skill's SKILL.md body (frontmatter stripped) from a skills root; null if absent. */
+export function readSkillBodyFromDisk(skillsRoot: string, name: string, author: string): string | null {
+  try {
+    const md = fs.readFileSync(path.join(skillsRoot, `${name}--${author}`, "SKILL.md"), "utf8");
+    return splitFrontmatter(md).body.trim();
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Resolve a skill's body from its ACTUAL install location via the pull manifest,
+ * trying every recorded installRoot, then a fallback root. Authoritative — handles
+ * skills pulled with `--to project` into any cwd (invocations come from all
+ * projects, so the worker can't assume its own cwd), and avoids editing a
+ * same-named skill that happens to sit in the current cwd.
+ */
+export function readSkillBodyViaManifest(
+  name: string,
+  author: string,
+  manifest: PulledManifest,
+  fallbackRoot?: string,
+): string | null {
+  const dirName = `${name}--${author}`;
+  const roots = manifest.entries.filter((e) => e.dirName === dirName).map((e) => e.installRoot);
+  if (fallbackRoot) roots.push(fallbackRoot);
+  for (const root of roots) {
+    const body = readSkillBodyFromDisk(root, name, author);
+    if (body) return body;
+  }
+  return null;
+}
diff --git a/src/skillify/skillopt-meta.ts b/src/skillify/skillopt-meta.ts
new file mode 100644
index 00000000..f86473f6
--- /dev/null
+++ b/src/skillify/skillopt-meta.ts
@@ -0,0 +1,84 @@
+/**
+ * Meta-skill — the optimizer's cross-run memory (the paper's meta-skill). Records
+ * every edit proposed for a skill so later runs (a) don't re-propose an edit that
+ * was already tried, and (b) feed "what's been tried" to the proposer. When the A/B
+ * gate lands, the recorded `status` (proposed → applied/reverted) closes the loop so
+ * the optimizer learns which kinds of edits actually help a given skill.
+ *
+ * Append-only JSONL at <stateDir>/skillopt/meta.jsonl. Pure helpers + injected path,
+ * so it's unit-tested with a tmp file.
+ */
+import fs from "node:fs";
+import path from "node:path";
+import type { Edit } from "./skill-edits.js";
+
+export type MetaStatus = "proposed" | "applied" | "reverted";
+
+export interface MetaEntry {
+  skill: string;       // "<name>--<author>"
+  ops: string[];       // short per-edit summaries (op + anchor/preview)
+  fingerprint: string; // stable hash of the edits, for dedup
+  proposedAt: string;
+  status: MetaStatus;
+}
+
+export const skillRef = (name: string, author: string) => `${name}--${author}`;
+
+/** Short human summary of one edit. */
+function summarizeEdit(e: Edit): string {
+  const anchor = e.target ? ` @"${e.target.slice(0, 40)}"` : "";
+  const preview = e.content ? `: ${e.content.slice(0, 60).replace(/\s+/g, " ")}` : "";
+  return `${e.op}${anchor}${preview}`;
+}
+
+/** Order-independent fingerprint of an edit set (so the same edits dedup). */
+export function fingerprintEdits(edits: Edit[]): string {
+  return edits
+    .map((e) => `${e.op}|${e.target ?? ""}|${e.content ?? ""}`)
+    .sort()
+    .join("\n");
+}
+
+export function loadMeta(file: string): MetaEntry[] {
+  let raw: string;
+  try { raw = fs.readFileSync(file, "utf8"); } catch { return []; }
+  const out: MetaEntry[] = [];
+  for (const line of raw.split("\n")) {
+    const t = line.trim();
+    if (!t) continue;
+    try {
+      const e = JSON.parse(t) as MetaEntry;
+      if (e && typeof e.skill === "string" && typeof e.fingerprint === "string") out.push(e);
+    } catch { /* skip malformed line */ }
+  }
+  return out;
+}
+
+export function appendMeta(file: string, entry: MetaEntry): void {
+  fs.mkdirSync(path.dirname(file), { recursive: true });
+  fs.appendFileSync(file, JSON.stringify(entry) + "\n");
+}
+
+/** Has this exact edit set already been proposed for this skill? (avoid churn) */
+export function alreadyProposed(meta: MetaEntry[], name: string, author: string, edits: Edit[]): boolean {
+  const ref = skillRef(name, author);
+  const fp = fingerprintEdits(edits);
+  return meta.some((m) => m.skill === ref && m.fingerprint === fp);
+}
+
+/** Summaries of edits previously tried for this skill — context for the proposer. */
+export function priorEditSummaries(meta: MetaEntry[], name: string, author: string): string[] {
+  const ref = skillRef(name, author);
+  return meta.filter((m) => m.skill === ref).flatMap((m) => m.ops);
+}
+
+/** Build a meta entry for a freshly-proposed edit set. */
+export function metaEntryFor(name: string, author: string, edits: Edit[], now: string): MetaEntry {
+  return {
+    skill: skillRef(name, author),
+    ops: edits.map(summarizeEdit),
+    fingerprint: fingerprintEdits(edits),
+    proposedAt: now,
+    status: "proposed",
+  };
+}
diff --git a/src/skillify/skillopt-trigger.ts b/src/skillify/skillopt-trigger.ts
index f96a757a..cc9e7d9e 100644
--- a/src/skillify/skillopt-trigger.ts
+++ b/src/skillify/skillopt-trigger.ts
@@ -16,9 +16,19 @@ import { fileURLToPath } from "node:url";
 import { log as _log } from "../utils/debug.js";
 import { getStateDir } from "./state-dir.js";
 import { tryAcquireWorkerLock, releaseWorkerLock } from "./state.js";
+import { loadConfig } from "../config.js";
 
 const log = (m: string) => _log("skillopt-trigger", m);
 
+/**
+ * Fire-gate: the worker queries Deeplake via loadConfig(), which accepts both the
+ * credentials file AND env creds (HIVEMIND_TOKEN/HIVEMIND_ORG_ID). Use the SAME check
+ * here so we neither skip env-cred users forever nor stamp on a malformed token-only file.
+ */
+function defaultHasCreds(): boolean {
+  try { return Boolean(loadConfig()?.token); } catch { return false; }
+}
+
 export const WEEK_MS = 7 * 24 * 60 * 60 * 1000;
 /** Cross-process lock key arbitrating the weekly fire (see runWeeklySkillOpt). */
 const LOCK_KEY = "skillopt-weekly";
@@ -81,11 +91,12 @@ export interface FireDeps {
   tryLock?: () => boolean;            // cross-process arbiter; default: real worker lock
   releaseLock?: () => void;          // default: release the real worker lock
   reload?: () => SkillOptState;      // fresh state re-read INSIDE the lock; default: loadState
+  canFire?: () => boolean;           // gate before stamping; default: creds present
 }
 
 export interface FireResult {
   fired: boolean;
-  reason?: "disabled" | "in-worker" | "throttled" | "locked" | "spawned";
+  reason?: "disabled" | "in-worker" | "throttled" | "locked" | "no-creds" | "spawned";
 }
 
 /**
@@ -102,6 +113,10 @@ export function runWeeklySkillOpt(deps: FireDeps = {}): FireResult {
   // Cheap pre-lock check: skip the lock entirely when clearly throttled.
   if (!shouldFire(state.lastRun, now)) return { fired: false, reason: "throttled" };
 
+  // Don't burn the weekly throttle when logged out — stamping lastRun before the
+  // worker confirms creds would skip a user who logs in shortly after until next week.
+  if (!(deps.canFire ?? defaultHasCreds)()) return { fired: false, reason: "no-creds" };
+
   // Cross-process arbiter: two SessionStart hooks racing at the weekly boundary
   // could both pass the throttle and spawn duplicate workers (doubling user-side
   // cost once the worker does real LLM work). An atomic openSync(wx) worker-lock
diff --git a/src/skillify/skillopt-worker.ts b/src/skillify/skillopt-worker.ts
index 77d815bc..d9f1ecd7 100644
--- a/src/skillify/skillopt-worker.ts
+++ b/src/skillify/skillopt-worker.ts
@@ -1,32 +1,74 @@
 #!/usr/bin/env node
 /**
- * Detached weekly SkillOpt worker (spawned by skillopt-trigger). Runs the loop ONCE:
- *   1. detect a deficient skill (behavioral: sessions that loaded it still scored low)
- *   2. optimizer proposes a bounded edit (v2)
- *   3. real-rollout gate: keep v2 only if it measurably beats v1
- *   4. silent canary publish + post-publish monitor / auto-revert
+ * Detached weekly SkillOpt worker (spawned by skillopt-trigger). Runs the cycle ONCE:
+ *   1. detect deficient skills from real invocations (anchor + judge, windowed)
+ *   2. ≥5 fire gate (act on a pattern, not noise)
+ *   3. propose a bounded edit per deficient skill and write a REVIEW PROPOSAL
  *
- * Uses the user's own agent (claude -p / codex), so no org API key. Runs in the background; the
- * user never notices. HIVEMIND_SKILLOPT_WORKER=1 is set by the trigger as a recursion guard.
- *
- * STATUS: scaffold. Steps 1/3/4 depend on prerequisites not yet shipped (deployed attribution data
- * for detection + monitoring, and a local rollout sandbox). The loop ENGINE (rollout->optimize->gate)
- * is prototyped in experiments/skillopt-spike (skillopt-loop.ts, validated both directions). This
- * entry exists so the trigger has a real, spawnable target and the wiring is testable end to end.
+ * It does NOT auto-publish: the offline gate isn't trustworthy (spike finding), so
+ * live publish is reserved for the real-usage A/B (deferred). Runs on the user's own
+ * agent (claude -p) — no org key, cost lands on the user — in the background, weekly.
+ * HIVEMIND_SKILLOPT_WORKER=1 is set by the trigger as a recursion guard.
  */
+import os from "node:os";
+import path from "node:path";
 import { log as _log } from "../utils/debug.js";
+import { loadConfig } from "../config.js";
+import { DeeplakeApi } from "../deeplake-api.js";
+import { getStateDir } from "./state-dir.js";
+import { runSkillOptCycle, writeProposalToDisk, readSkillBodyViaManifest } from "./skillopt-engine.js";
+import { loadMeta, appendMeta, priorEditSummaries, alreadyProposed, metaEntryFor } from "./skillopt-meta.js";
+import { loadManifest } from "./manifest.js";
 
 const log = (m: string) => _log("skillopt-worker", m);
 
 async function main(): Promise<void> {
   log("skillopt worker started (detached, weekly)");
-  // TODO(skillopt): wire the validated loop engine here once prerequisites land:
-  //   const skill = await detectDeficientSkill();        // needs deployed attribution + satisfaction
-  //   if (!skill) { log("no deficient skill found"); return; }
-  //   const v2 = await optimize(skill);                   // optimizer proposes a bounded edit
-  //   const gain = await gateViaRealRollout(skill, v2);   // keep only if v2 beats v1 (validated)
-  //   if (gain > THRESHOLD) await canaryPublish(skill, v2); // silent; monitor + auto-revert
-  log("skillopt worker: loop body not yet enabled (prerequisites pending) — exiting cleanly");
+  const config = loadConfig();
+  if (!config?.token) { log("no config/credentials — exiting"); return; }
+
+  const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, config.tableName);
+  const query = (sql: string) => api.query(sql) as Promise<Array<Record<string, unknown>>>;
+  // Resolve skill bodies via the pull manifest's recorded installRoot (authoritative)
+  // — invocations come from ALL projects, so we can't assume the worker's own cwd.
+  // The global ~/.claude/skills is a fallback for skills not in the manifest.
+  const manifest = loadManifest();
+  const skillsRoot = path.join(os.homedir(), ".claude", "skills");
+  const proposalsRoot = path.join(getStateDir(), "skillopt", "proposals");
+  const metaFile = path.join(getStateDir(), "skillopt", "meta.jsonl");
+  const metaCache = loadMeta(metaFile);
+  // Lookback + thresholds are env-tunable (defaults: 30-day window, the detector's
+  // own min-n, and a ≥5-deficient fire gate). A positive override wins; anything
+  // non-numeric/≤0 falls back to the default.
+  const envNum = (k: string): number | undefined => { const n = Number(process.env[k]); return Number.isFinite(n) && n > 0 ? n : undefined; };
+  const lookbackDays = envNum("HIVEMIND_SKILLOPT_LOOKBACK_DAYS") ?? 30;
+  const sinceIso = new Date(Date.now() - lookbackDays * 24 * 60 * 60 * 1000).toISOString();
+
+  const res = await runSkillOptCycle({
+    query,
+    sessionsTable: config.sessionsTableName,
+    readSkillBody: (name, author) => readSkillBodyViaManifest(name, author, manifest, skillsRoot),
+    writeProposal: (rec) => writeProposalToDisk(proposalsRoot, rec),
+    meta: {
+      prior: (n, a) => priorEditSummaries(metaCache, n, a),
+      has: (n, a, edits) => alreadyProposed(metaCache, n, a, edits),
+      record: (n, a, edits) => { const e = metaEntryFor(n, a, edits, new Date().toISOString()); appendMeta(metaFile, e); metaCache.push(e); },
+    },
+    detector: {
+      sinceIso, limit: 5000,
+      minInvocations: envNum("HIVEMIND_SKILLOPT_MIN_INVOCATIONS"),
+      failureRateThreshold: envNum("HIVEMIND_SKILLOPT_FAILURE_RATE"),
+    },
+    fireThreshold: envNum("HIVEMIND_SKILLOPT_FIRE_THRESHOLD"),
+    now: new Date().toISOString(),
+  });
+
+  if (!res.fired) {
+    log(`skillopt: ${res.deficientCount} deficient skill(s) — below the fire gate, no action`);
+  } else {
+    const changed = res.proposals.filter((p) => p.changed).length;
+    log(`skillopt: fired — ${res.deficientCount} deficient, ${changed} edit proposal(s) written to ${proposalsRoot}`);
+  }
 }
 
 main().catch((e) => { log(`fatal (swallowed): ${(e as Error)?.message ?? e}`); process.exit(0); });
diff --git a/src/skillify/skills-active.ts b/src/skillify/skills-active.ts
deleted file mode 100644
index d9b3c46d..00000000
Binary files a/src/skillify/skills-active.ts and /dev/null differ
diff --git a/src/skillify/success-judge.ts b/src/skillify/success-judge.ts
new file mode 100644
index 00000000..c2d65626
--- /dev/null
+++ b/src/skillify/success-judge.ts
@@ -0,0 +1,68 @@
+/**
+ * Success-judge — reward level 2. Given a windowed transcript slice, asks the
+ * model the ONE question that resists sycophancy: was the user's task actually
+ * accomplished CORRECTLY? (Ignore whether the user seemed happy — a praised-but-
+ * wrong answer is a failure.) Returns success 0|1 + confidence + reason.
+ *
+ * Runs on the USER's own agent (claude -p) — cost lands on the user, so the
+ * default model is cheap and this is only ever called on anchor-flagged windows
+ * (level 1), never on every session.
+ *
+ * The model call is injected (ModelCall) so the judging logic is unit-tested with
+ * zero real LLM calls; the default shells out to `claude -p`, all tools denied.
+ *
+ * Conservative on failure: an unparseable/errored/empty judgment returns
+ * success=1 (do NOT count as a failure), so a flaky judge can never manufacture
+ * deficiency — it can only fail to detect (which the next run catches).
+ */
+import { claudeModel, type ModelCall } from "./claude-model.js";
+
+export type { ModelCall };
+
+export interface SuccessVerdict {
+  success: 0 | 1;
+  confidence: number; // 0..1
+  reason: string;
+}
+
+const SYSTEM =
+  "You are a strict engineering reviewer. Judge ONLY whether the user's task was " +
+  "actually accomplished CORRECTLY in this session slice. Ignore whether the user " +
+  "seemed happy or polite — a praised-but-wrong answer is a FAILURE. Reply with " +
+  'ONLY a JSON object: {"success": 0 or 1, "confidence": 0.0-1.0, "reason": ' +
+  '"<=200 chars citing concrete evidence"}.';
+
+function buildUserPrompt(window: string): string {
+  return `Session slice (USER/ASSISTANT turns around a skill invocation):\n\n${window}\n\n` +
+    "Did the user's task get accomplished correctly? JSON only.";
+}
+
+function extractJson(raw: string): Record<string, unknown> | null {
+  let s = raw.trim();
+  const fence = s.match(/```(?:json)?\s*([\s\S]*?)```/);
+  if (fence) s = fence[1].trim();
+  const a = s.indexOf("{");
+  const b = s.lastIndexOf("}");
+  if (a === -1 || b <= a) return null;
+  try { return JSON.parse(s.slice(a, b + 1)) as Record<string, unknown>; } catch { return null; }
+}
+
+/** Parse a model response into a verdict; unparseable → conservative success=1. */
+export function parseVerdict(raw: string): SuccessVerdict {
+  const j = extractJson(raw);
+  if (!j) return { success: 1, confidence: 0, reason: "unparseable judge output" };
+  const fail = j.success === 0 || j.success === "0" || j.success === false;
+  const confidence = typeof j.confidence === "number" ? Math.max(0, Math.min(1, j.confidence)) : 0.5;
+  const reason = typeof j.reason === "string" ? j.reason.slice(0, 240) : "";
+  return { success: fail ? 0 : 1, confidence, reason };
+}
+
+export async function judgeSuccess(window: string, opts: { model?: ModelCall } = {}): Promise<SuccessVerdict> {
+  if (!window.trim()) return { success: 1, confidence: 0, reason: "empty window" };
+  const model = opts.model ?? claudeModel("haiku"); // cheap default; judge only runs on anchored windows
+  try {
+    return parseVerdict(await model(SYSTEM, buildUserPrompt(window)));
+  } catch (e: unknown) {
+    return { success: 1, confidence: 0, reason: `judge failed: ${(e as Error)?.message ?? String(e)}` };
+  }
+}
diff --git a/tests/claude-code/session-start-hook.test.ts b/tests/claude-code/session-start-hook.test.ts
index de5fa30a..c1639878 100644
--- a/tests/claude-code/session-start-hook.test.ts
+++ b/tests/claude-code/session-start-hook.test.ts
@@ -102,7 +102,6 @@ const stdoutSpy = vi.spyOn(process.stdout, "write");
 async function runHook(env: Record<string, string | undefined> = {}): Promise<string | null> {
   delete process.env.HIVEMIND_WIKI_WORKER;
   delete process.env.HIVEMIND_CAPTURE;
-  delete process.env.HIVEMIND_SKILL_ATTRIBUTION;
   for (const [k, v] of Object.entries(env)) {
     if (v === undefined) delete process.env[k];
     else process.env[k] = v;
@@ -235,28 +234,21 @@ describe("session-start hook — placeholder branching", () => {
     expect(ensureTableMock).toHaveBeenCalled();
     expect(ensureSessionsTableMock).toHaveBeenCalledWith("sessions");
     // 1 SELECT (existing-summary check) + 1 INSERT (placeholder)
-    // + 1 INSERT (skills_active attribution) + 2 renderer SELECTs
-    // (listRules + listOpenGoals) = 5 queries.
-    expect(queryMock).toHaveBeenCalledTimes(5);
+    // + 2 renderer SELECTs (listRules + listOpenGoals) = 4 queries.
+    expect(queryMock).toHaveBeenCalledTimes(4);
     expect(queryMock.mock.calls[0][0]).toMatch(/^SELECT path FROM/);
     expect(queryMock.mock.calls[1][0]).toMatch(/^INSERT INTO/);
-    // skills_active attribution row — shape, not just count (asserts the new
-    // write is the attribution INSERT, so a second stray mutation can't sneak in).
-    expect(queryMock.mock.calls[2][0]).toMatch(/^INSERT INTO "sessions"/);
-    expect(queryMock.mock.calls[2][0]).toContain("skills_active");
-    expect(queryMock.mock.calls[3][0]).toMatch(/^SELECT .* FROM "hivemind_rules"/);
-    expect(queryMock.mock.calls[4][0]).toMatch(/^SELECT .* FROM "hivemind_goals"/);
+    expect(queryMock.mock.calls[2][0]).toMatch(/^SELECT .* FROM "hivemind_rules"/);
+    expect(queryMock.mock.calls[3][0]).toMatch(/^SELECT .* FROM "hivemind_goals"/);
     expect(debugLogMock).toHaveBeenCalledWith("placeholder created");
   });
 
   it("skips placeholder INSERT when summary already exists (resumed session)", async () => {
     queryMock.mockResolvedValueOnce([{ path: "/summaries/alice/sid-1.md" }]);
     await runHook();
-    // 1 placeholder SELECT (returns row, no INSERT) + 1 skills_active
-    // attribution INSERT (runs regardless of placeholder branch) + 2 renderer
-    // SELECTs (rules + goals) = 4 queries.
-    expect(queryMock).toHaveBeenCalledTimes(4);
-    expect(queryMock.mock.calls[1][0]).toContain("skills_active");
+    // 1 placeholder SELECT (returns row, no INSERT) + 2 renderer SELECTs
+    // (rules + goals) = 3 queries.
+    expect(queryMock).toHaveBeenCalledTimes(3);
   });
 
   it("non-empty rules block is appended to additionalContext", async () => {
@@ -272,7 +264,6 @@ describe("session-start hook — placeholder branching", () => {
     };
     queryMock.mockResolvedValueOnce([]);     // placeholder SELECT
     queryMock.mockResolvedValueOnce([]);     // placeholder INSERT
-    queryMock.mockResolvedValueOnce([]);     // skills_active attribution INSERT
     queryMock.mockResolvedValueOnce([rule]); // renderer rules
     queryMock.mockResolvedValueOnce([]);     // renderer goals (empty)
     const out = await runHook();
@@ -296,13 +287,12 @@ describe("session-start hook — placeholder branching", () => {
     };
     queryMock.mockResolvedValueOnce([]);     // placeholder SELECT
     queryMock.mockResolvedValueOnce([]);     // placeholder INSERT
-    queryMock.mockResolvedValueOnce([]);     // skills_active attribution INSERT
     queryMock.mockResolvedValueOnce([rule]); // renderer rules
     queryMock.mockResolvedValueOnce([]);     // renderer goals (empty)
     const out = await runHook();
     const parsed = JSON.parse(out!);
     expect(parsed.hookSpecificOutput.additionalContext).toContain("no DROP TABLE on prod");
-    expect(queryMock).toHaveBeenCalledTimes(5);
+    expect(queryMock).toHaveBeenCalledTimes(4);
   });
 
   it("skips the renderer SELECTs when the trusted table list omits rules + goals", async () => {
@@ -310,12 +300,11 @@ describe("session-start hook — placeholder branching", () => {
     // SELECT. Only the placeholder SELECT + INSERT run.
     knownTablesMock.mockResolvedValue([]);
     await runHook();
-    // placeholder SELECT + placeholder INSERT + skills_active attribution
-    // INSERT = 3 (renderer fires no SELECT when no tables are trusted).
-    expect(queryMock).toHaveBeenCalledTimes(3);
+    // placeholder SELECT + placeholder INSERT = 2 (renderer fires no SELECT
+    // when no tables are trusted).
+    expect(queryMock).toHaveBeenCalledTimes(2);
     expect(queryMock.mock.calls[0][0]).toMatch(/^SELECT path FROM/);
     expect(queryMock.mock.calls[1][0]).toMatch(/^INSERT INTO/);
-    expect(queryMock.mock.calls[2][0]).toContain("skills_active");
   });
 
   it("HIVEMIND_CAPTURE=false: no placeholder, no DDL (ensure), but renderer still runs", async () => {
@@ -336,29 +325,6 @@ describe("session-start hook — placeholder branching", () => {
     );
   });
 
-  it("HIVEMIND_SKILL_ATTRIBUTION=0: skips the skills_active write entirely", async () => {
-    await runHook({ HIVEMIND_SKILL_ATTRIBUTION: "0" });
-    // placeholder SELECT + INSERT + 2 renderer SELECTs = 4 (NO attribution row).
-    expect(queryMock).toHaveBeenCalledTimes(4);
-    // negative assertion: the attribution INSERT must not be present at all.
-    for (const call of queryMock.mock.calls) {
-      expect(call[0]).not.toContain("skills_active");
-    }
-  });
-
-  it("swallows a failed skills_active attribution write (never breaks SessionStart)", async () => {
-    // Content-based (not position-based) so it's robust to query ordering: the
-    // attribution INSERT is the only query carrying the skills_active marker.
-    queryMock.mockImplementation((sql: string) =>
-      sql.includes("skills_active") ? Promise.reject(new Error("attr boom")) : Promise.resolve([]),
-    );
-    const out = await runHook();
-    expect(out).toBeTruthy(); // hook still completes and emits context
-    expect(debugLogMock).toHaveBeenCalledWith(
-      expect.stringContaining("skills_active attribution failed (swallowed): attr boom"),
-    );
-  });
-
   it("logs the SkillOpt fired branch when the weekly trigger spawns a worker", async () => {
     runWeeklySkillOptMock.mockReturnValue({ fired: true, reason: "spawned" });
     await runHook();
diff --git a/tests/shared/deficiency-detector.test.ts b/tests/shared/deficiency-detector.test.ts
new file mode 100644
index 00000000..5c9ee877
--- /dev/null
+++ b/tests/shared/deficiency-detector.test.ts
@@ -0,0 +1,90 @@
+import { describe, it, expect, vi } from "vitest";
+import { detectDeficientSkills } from "../../src/skillify/deficiency-detector.js";
+
+const TABLE = "sessions";
+
+const invRow = (skill: string, sid: string) => ({
+  message: { type: "tool_call", tool_name: "Skill", tool_input: JSON.stringify({ skill }), session_id: sid, timestamp: sid },
+  last_update_date: sid,
+});
+const transcript = (skill: string, sid: string, pushback: boolean) => [
+  { message: { type: "user_message", content: "do it" } },
+  { message: { type: "tool_call", tool_name: "Skill", tool_input: JSON.stringify({ skill }), timestamp: sid } },
+  { message: { type: "assistant_message", content: "done" } },
+  { message: { type: "user_message", content: pushback ? "no that's wrong, it mocks the client" : "thanks, perfect" } },
+];
+
+function world() {
+  const invs: Array<Record<string, unknown>> = [];
+  const transcripts = new Map<string, Array<Record<string, unknown>>>();
+  const add = (skill: string, sid: string, pushback: boolean) => {
+    invs.push(invRow(skill, sid));
+    transcripts.set(sid, transcript(skill, sid, pushback));
+  };
+  for (let i = 0; i < 10; i++) add("bad--auth", `bad${i}`, i < 5);     // 5/10 pushback → deficient
+  for (let i = 0; i < 10; i++) add("good--auth", `good${i}`, false);   // 0 pushback → healthy
+  for (let i = 0; i < 3; i++) add("sparse--auth", `sparse${i}`, true); // all fail but too few (min-n)
+  return { invs, transcripts };
+}
+
+describe("detectDeficientSkills", () => {
+  it("flags only skills with enough invocations AND a high confirmed-failure rate", async () => {
+    const { invs, transcripts } = world();
+    const judge = vi.fn(async (_s: string, _u: string) => '{"success":0,"confidence":0.9,"reason":"mocks the client"}');
+    const query = vi.fn(async (sql: string) => {
+      if (sql.includes('"Skill"') && sql.includes("ORDER BY last_update_date")) return invs; // the invocation list
+      const m = sql.match(/\/sessions\/%([^%]+)%/);                                           // a window query
+      return m ? (transcripts.get(m[1]) ?? []) : [];
+    });
+
+    const { skills, deficientCount } = await detectDeficientSkills(query, TABLE, { judge });
+    const bad = skills.find((s) => s.name === "bad")!;
+    const good = skills.find((s) => s.name === "good")!;
+    const sparse = skills.find((s) => s.name === "sparse")!;
+
+    expect(bad).toMatchObject({ invocations: 10, anchored: 5, confirmedFailures: 5, deficient: true });
+    expect(bad.failureRate).toBeCloseTo(0.5);
+    expect(good).toMatchObject({ invocations: 10, anchored: 0, confirmedFailures: 0, deficient: false });
+    expect(sparse).toMatchObject({ invocations: 3, confirmedFailures: 3, deficient: false }); // min-n blocks it
+    expect(deficientCount).toBe(1);
+
+    // token discipline: judge runs ONLY on anchored windows (5 bad + 3 sparse = 8), never the 10 good
+    expect(judge).toHaveBeenCalledTimes(8);
+  });
+
+  it("caps the judged window at maxChars (a pasted log can't blow the judge call)", async () => {
+    const huge = "L".repeat(5000);
+    const skill = "bigskill--x", sid = "S1";
+    const transcripts = new Map<string, Array<Record<string, unknown>>>([[sid, [
+      { message: { type: "user_message", content: "do it" } },
+      { message: { type: "tool_call", tool_name: "Skill", tool_input: JSON.stringify({ skill }), timestamp: sid } },
+      { message: { type: "assistant_message", content: huge } },                                  // pasted log
+      { message: { type: "user_message", content: "no that's wrong" } },
+    ]]]);
+    let judgedLen = 0;
+    const judge = vi.fn(async (_s: string, user: string) => { judgedLen = user.length; return '{"success":0,"confidence":0.9,"reason":"x"}'; });
+    const query = vi.fn(async (sql: string) => {
+      if (sql.includes('"Skill"') && sql.includes("ORDER BY last_update_date")) return [invRow(skill, sid)];
+      const m = sql.match(/\/sessions\/%([^%]+)%/);
+      return m ? (transcripts.get(m[1]) ?? []) : [];
+    });
+    await detectDeficientSkills(query, TABLE, { judge, minInvocations: 1, window: { maxChars: 300 } });
+    expect(judgedLen).toBeGreaterThan(0);  // judge was called (anchored)
+    expect(judgedLen).toBeLessThan(800);   // capped — not the ~5000-char paste
+  });
+
+  it("respects a custom threshold + min-n", async () => {
+    const { invs, transcripts } = world();
+    const judge = vi.fn(async (_s: string, _u: string) => '{"success":0,"confidence":0.9,"reason":"x"}');
+    const query = vi.fn(async (sql: string) => {
+      if (sql.includes('"Skill"') && sql.includes("ORDER BY last_update_date")) return invs;
+      const m = sql.match(/\/sessions\/%([^%]+)%/);
+      return m ? (transcripts.get(m[1]) ?? []) : [];
+    });
+    // minInvocations 3, threshold 0.9 → only "sparse" (rate 1.0, 3 inv) qualifies; "bad" (0.5) doesn't
+    const { deficientCount, skills } = await detectDeficientSkills(query, TABLE, { judge, minInvocations: 3, failureRateThreshold: 0.9 });
+    expect(skills.find((s) => s.name === "sparse")!.deficient).toBe(true);
+    expect(skills.find((s) => s.name === "bad")!.deficient).toBe(false);
+    expect(deficientCount).toBe(1);
+  });
+});
diff --git a/tests/shared/session-anchor.test.ts b/tests/shared/session-anchor.test.ts
new file mode 100644
index 00000000..942462c7
--- /dev/null
+++ b/tests/shared/session-anchor.test.ts
@@ -0,0 +1,54 @@
+import { describe, it, expect } from "vitest";
+import { detectAnchor } from "../../src/skillify/session-anchor.js";
+import type { Turn } from "../../src/skillify/skill-invocations.js";
+
+const u = (text: string): Turn => ({ role: "USER", text });
+const a = (text: string): Turn => ({ role: "ASSISTANT", text });
+
+describe("detectAnchor", () => {
+  it("fires on user pushback right after an assistant turn", () => {
+    const r = detectAnchor([u("add a smoke test"), a("done, here it is"), u("no that's wrong, it mocks the client")]);
+    expect(r.anchored).toBe(true);
+    expect(r.kind).toBe("correction");
+    expect(r.evidence).toContain("wrong");
+  });
+
+  it("does NOT fire on the opening request (no preceding assistant turn)", () => {
+    const r = detectAnchor([u("this won't work without a flush — add a smoke test")]);
+    expect(r.anchored).toBe(false);
+  });
+
+  it("does NOT fire on a user turn that follows another user turn", () => {
+    const r = detectAnchor([u("first"), u("that didn't work")]); // no assistant in between
+    expect(r.anchored).toBe(false);
+  });
+
+  it("suppresses clear benign negatives (no problem / works now / thanks)", () => {
+    expect(detectAnchor([a("fixed it"), u("no problem, thanks!")]).anchored).toBe(false);
+    expect(detectAnchor([a("try this"), u("works now, perfect")]).anchored).toBe(false);
+  });
+
+  it("catches several real correction phrasings", () => {
+    for (const p of ["that doesn't work", "still failing", "that's incorrect", "try again", "nope", "you broke the build"]) {
+      expect(detectAnchor([a("here"), u(p)]).anchored, p).toBe(true);
+    }
+  });
+
+  it("fires on polite-but-failing corrections (strong pushback overrides benign words)", () => {
+    expect(detectAnchor([a("here"), u("thanks, but this is still failing")]).anchored).toBe(true);
+    expect(detectAnchor([a("done"), u("perfect start, but that's still wrong")]).anchored).toBe(true);
+  });
+
+  it("ignores PRE-invocation pushback (fromIndex) — no misattribution to a repair-attempt skill", () => {
+    // turns: [a, USER pushback (pre-invocation), a (skill output), USER ok] — pivot=2
+    const turns = [a("attempt 1"), u("no that's wrong"), a("retried with the skill"), u("looks good")];
+    expect(detectAnchor(turns, 2).anchored).toBe(false); // the pre-invocation correction is not scanned
+    // a genuine POST-invocation pushback still fires
+    expect(detectAnchor([a("attempt 1"), u("no wrong"), a("fixed"), u("still failing")], 2).anchored).toBe(true);
+  });
+
+  it("returns none when the user is satisfied / silent", () => {
+    expect(detectAnchor([u("do X"), a("done")]).anchored).toBe(false);
+    expect(detectAnchor([]).anchored).toBe(false);
+  });
+});
diff --git a/tests/shared/skill-edit-gate.test.ts b/tests/shared/skill-edit-gate.test.ts
new file mode 100644
index 00000000..4cbb760e
--- /dev/null
+++ b/tests/shared/skill-edit-gate.test.ts
@@ -0,0 +1,58 @@
+import { describe, it, expect, vi } from "vitest";
+import { gateEditOutcome, gateEdit, type WindowStats } from "../../src/skillify/skill-edit-gate.js";
+
+const stats = (invocations: number, failureRate: number): WindowStats =>
+  ({ invocations, anchored: Math.round(invocations * failureRate), confirmed: Math.round(invocations * failureRate), failureRate });
+
+describe("gateEditOutcome", () => {
+  it("KEEP when the failure rate dropped by >= margin", () => {
+    expect(gateEditOutcome(stats(10, 0.6), stats(10, 0.1)).decision).toBe("keep");
+  });
+  it("REVERT when it got measurably worse", () => {
+    expect(gateEditOutcome(stats(10, 0.1), stats(10, 0.5)).decision).toBe("revert");
+  });
+  it("INCONCLUSIVE when there's too little post-publish use", () => {
+    expect(gateEditOutcome(stats(10, 0.6), stats(3, 0.0)).decision).toBe("inconclusive");
+  });
+  it("INCONCLUSIVE when the change is within the margin (noise)", () => {
+    expect(gateEditOutcome(stats(10, 0.30), stats(10, 0.25)).decision).toBe("inconclusive");
+  });
+});
+
+const invRow = (skill: string, sid: string) => ({
+  message: { type: "tool_call", tool_name: "Skill", tool_input: JSON.stringify({ skill }), session_id: sid, timestamp: sid },
+  last_update_date: sid,
+});
+const transcript = (skill: string, sid: string, pushback: boolean) => [
+  { message: { type: "user_message", content: "do it" } },
+  { message: { type: "tool_call", tool_name: "Skill", tool_input: JSON.stringify({ skill }), timestamp: sid } },
+  { message: { type: "assistant_message", content: "done (mocked)" } },
+  { message: { type: "user_message", content: pushback ? "no that's wrong, it mocks the client" : "looks good thanks" } },
+];
+
+describe("gateEdit (longitudinal before/after)", () => {
+  it("keeps an edit whose failure rate dropped after publish", async () => {
+    const PUB = "2026-06-05T00:00:00.000Z";
+    const transcripts = new Map<string, Array<Record<string, unknown>>>();
+    const beforeInvs: Array<Record<string, unknown>> = [];
+    const afterInvs: Array<Record<string, unknown>> = [];
+    for (let i = 0; i < 8; i++) {
+      const b = `bef${i}`, a = `aft${i}`;
+      beforeInvs.push(invRow("x--a", b)); transcripts.set(b, transcript("x--a", b, true));   // before: all pushback
+      afterInvs.push(invRow("x--a", a)); transcripts.set(a, transcript("x--a", a, false));    // after: none
+    }
+    const judge = vi.fn(async (_s: string, _u: string) => '{"success":0,"confidence":0.9,"reason":"mocks"}');
+    const query = vi.fn(async (sql: string) => {
+      if (sql.includes('"Skill"') && sql.includes("ORDER BY last_update_date")) {
+        return sql.includes(`< '${PUB}'`) ? beforeInvs : afterInvs; // before window has the untilIso bound
+      }
+      const m = sql.match(/\/sessions\/%([^%]+)%/);
+      return m ? (transcripts.get(m[1]) ?? []) : [];
+    });
+
+    const res = await gateEdit(query, "sessions", "x", "a", PUB, { windowDays: 14, nowIso: "2026-06-12T00:00:00.000Z", judge, minAfter: 5 });
+    expect(res.before.failureRate).toBeCloseTo(1.0);
+    expect(res.after.failureRate).toBeCloseTo(0.0);
+    expect(res.decision).toBe("keep");
+  });
+});
diff --git a/tests/shared/skill-edits.test.ts b/tests/shared/skill-edits.test.ts
new file mode 100644
index 00000000..9143270c
--- /dev/null
+++ b/tests/shared/skill-edits.test.ts
@@ -0,0 +1,64 @@
+import { describe, it, expect } from "vitest";
+import { applyEdits, selectEdits, SU_START, SU_END } from "../../src/skillify/skill-edits.js";
+
+describe("applyEdits", () => {
+  const base = "## Rules\n1. mock the client\n2. skip flush";
+
+  it("append adds content at the end", () => {
+    const r = applyEdits(base, [{ op: "append", content: "3. verify via the API" }]);
+    expect(r.skill).toContain("3. verify via the API");
+    expect(r.applied).toBe(1);
+  });
+
+  it("insert_after inserts on the line after the target", () => {
+    const r = applyEdits(base, [{ op: "insert_after", target: "1. mock the client", content: "(NEVER mock — it hides failures)" }]);
+    expect(r.skill).toMatch(/1\. mock the client\n\(NEVER mock — it hides failures\)\n2\. skip flush/);
+  });
+
+  it("replace swaps the target text", () => {
+    const r = applyEdits(base, [{ op: "replace", target: "skip flush", content: "ALWAYS flush" }]);
+    expect(r.skill).toContain("2. ALWAYS flush");
+    expect(r.skill).not.toContain("skip flush");
+  });
+
+  it("delete removes the target text", () => {
+    const r = applyEdits(base, [{ op: "delete", target: "\n2. skip flush" }]);
+    expect(r.skill).toBe("## Rules\n1. mock the client");
+  });
+
+  it("skips edits whose target isn't found (and counts only applied)", () => {
+    const r = applyEdits(base, [{ op: "replace", target: "nonexistent", content: "x" }, { op: "append", content: "added" }]);
+    expect(r.applied).toBe(1);
+    expect(r.report.some((l) => l.includes("SKIP replace: target not found"))).toBe(true);
+  });
+
+  it("protects the slow-update region: skips edits targeting it, appends ABOVE it", () => {
+    const doc = `## Rules\n1. a\n\n${SU_START}\nLongitudinal: prefer X over Y.\n${SU_END}`;
+    const r = applyEdits(doc, [
+      { op: "delete", target: "prefer X over Y" },      // targets protected → skipped
+      { op: "append", content: "2. b" },                // lands above the region
+    ]);
+    expect(r.skill).toContain("prefer X over Y");        // protected content untouched
+    expect(r.report.some((l) => l.includes("protected slow-update region"))).toBe(true);
+    // appended content sits before the protected block
+    expect(r.skill.indexOf("2. b")).toBeLessThan(r.skill.indexOf(SU_START));
+  });
+
+  it("rejects a target that SPANS INTO the protected region (not just one starting inside)", () => {
+    const doc = `## Rules\n1. a\n${SU_START}\nLongitudinal guidance.\n${SU_END}`;
+    // target begins before SLOW_UPDATE_START but extends into the protected block
+    const r = applyEdits(doc, [{ op: "delete", target: `1. a\n${SU_START}\nLongitudinal` }]);
+    expect(r.applied).toBe(0);
+    expect(r.skill).toContain("Longitudinal guidance.");
+    expect(r.report.some((l) => l.includes("protected slow-update region"))).toBe(true);
+  });
+});
+
+describe("selectEdits (edit budget)", () => {
+  const edits = [1, 2, 3, 4].map((i) => ({ op: "append" as const, content: `${i}` }));
+  it("keeps at most `budget` edits", () => {
+    expect(selectEdits(edits, 2)).toHaveLength(2);
+    expect(selectEdits(edits, 0)).toHaveLength(0);
+    expect(selectEdits(edits, 99)).toHaveLength(4);
+  });
+});
diff --git a/tests/shared/skill-invocations.test.ts b/tests/shared/skill-invocations.test.ts
new file mode 100644
index 00000000..8e8e1bfd
--- /dev/null
+++ b/tests/shared/skill-invocations.test.ts
@@ -0,0 +1,122 @@
+import { describe, it, expect, vi } from "vitest";
+import {
+  invokedSkillRef,
+  splitOrgSkill,
+  listSkillInvocations,
+  windowAroundInvocation,
+  type SkillInvocation,
+} from "../../src/skillify/skill-invocations.js";
+
+const TABLE = "sessions";
+function mockQuery(rows: Array<Record<string, unknown>>) {
+  const calls: string[] = [];
+  return { fn: vi.fn(async (sql: string) => { calls.push(sql); return rows; }), calls };
+}
+const toolCall = (skill: string, sessionId = "S1", ts = "t", asString = false) => {
+  const msg = { type: "tool_call", tool_name: "Skill", tool_input: JSON.stringify({ skill }), session_id: sessionId, timestamp: ts };
+  return { message: asString ? JSON.stringify(msg) : msg, last_update_date: ts };
+};
+
+describe("invokedSkillRef", () => {
+  it("returns the skill ref for a Skill tool_call (object or stringified input)", () => {
+    expect(invokedSkillRef({ type: "tool_call", tool_name: "Skill", tool_input: JSON.stringify({ skill: "a--b" }) })).toBe("a--b");
+    expect(invokedSkillRef({ type: "tool_call", tool_name: "Skill", tool_input: { skill: "a--b" } as unknown })).toBe("a--b");
+  });
+  it("returns null for non-Skill tools and non-tool_call messages", () => {
+    expect(invokedSkillRef({ type: "tool_call", tool_name: "Bash", tool_input: "{}" })).toBeNull();
+    expect(invokedSkillRef({ type: "assistant_message", content: "use the Skill tool" })).toBeNull();
+    expect(invokedSkillRef({ type: "tool_call", tool_name: "Skill", tool_input: "not json" })).toBeNull();
+  });
+});
+
+describe("splitOrgSkill", () => {
+  it("splits <name>--<author>, last -- wins", () => {
+    expect(splitOrgSkill("posthog-smoke--kamo.aghbalyan")).toEqual({ name: "posthog-smoke", author: "kamo.aghbalyan" });
+    expect(splitOrgSkill("some-skill--first-last")).toEqual({ name: "some-skill", author: "first-last" });
+  });
+  it("rejects plugin-namespaced, bare, and malformed refs", () => {
+    expect(splitOrgSkill("hivemind:hivemind-memory")).toBeNull(); // plugin
+    expect(splitOrgSkill("update-config")).toBeNull();            // bare
+    expect(splitOrgSkill("baz--")).toBeNull();                    // empty author
+  });
+  it("rejects refs with path separators / traversal (no path escape)", () => {
+    expect(splitOrgSkill("../../etc--x")).toBeNull();
+    expect(splitOrgSkill("ok--..%2f")).toBeNull();   // contains ..
+    expect(splitOrgSkill("a/b--c")).toBeNull();      // separator
+    expect(splitOrgSkill("a--b/c")).toBeNull();      // separator in author
+  });
+});
+
+describe("listSkillInvocations", () => {
+  it("coarse-prefilters on \"Skill\" then keeps only org-skill tool_calls", async () => {
+    const { fn, calls } = mockQuery([
+      toolCall("posthog-smoke--kamo"),                 // org → kept
+      toolCall("hivemind:hivemind-memory"),            // plugin → dropped
+      toolCall("update-config"),                       // bare → dropped
+      { message: { type: "assistant_message", content: "mentions Skill" }, last_update_date: "t" }, // prose → dropped
+      toolCall("pg-debug--sasun", "S2", "t2", true),   // org, stringified message → kept
+    ]);
+    const got = await listSkillInvocations(fn, TABLE, { sinceIso: "2026-06-01", limit: 100 });
+    expect(calls[0]).toContain(`CAST(message AS TEXT) LIKE '%"Skill"%'`);
+    expect(calls[0]).toContain("last_update_date >= '2026-06-01'");
+    expect(calls[0]).toContain("LIMIT 100");
+    expect(got).toEqual([
+      { sessionId: "S1", name: "posthog-smoke", author: "kamo", ts: "t" },
+      { sessionId: "S2", name: "pg-debug", author: "sasun", ts: "t2" },
+    ]);
+  });
+});
+
+describe("windowAroundInvocation", () => {
+  const inv: SkillInvocation = { sessionId: "S1", name: "posthog-smoke", author: "kamo", ts: "t5" };
+  // turns: u1, a1, [skill invoked here], u2(pushback), a2  → window before=1/after=2 ⇒ a1..a2
+  const rows = [
+    { message: { type: "user_message", content: "first" } },
+    { message: { type: "assistant_message", content: "ack" } },
+    { message: { type: "tool_call", tool_name: "Skill", tool_input: JSON.stringify({ skill: "posthog-smoke--kamo" }), timestamp: "t5" } },
+    { message: { type: "tool_call", tool_name: "Bash", tool_input: "{}" } }, // non-skill tool → ignored
+    { message: { type: "user_message", content: "no that's wrong" } },
+    { message: { type: "assistant_message", content: "fixing" } },
+  ];
+
+  it("windows `before` turns before and `after` after the invocation", async () => {
+    const { fn, calls } = mockQuery(rows);
+    const out = await windowAroundInvocation(fn, TABLE, inv, { before: 1, after: 2 });
+    expect(calls[0]).toContain("path LIKE '/sessions/%S1%'");
+    // invIndex = 2 (two turns before the skill tool_call). before 1 → from turn 1; after 2 → turns 2,3.
+    expect(out).toBe("ASSISTANT: ack\n\nUSER: no that's wrong\n\nASSISTANT: fixing");
+  });
+
+  it("falls back to session end when the invocation can't be located", async () => {
+    const { fn } = mockQuery([
+      { message: { type: "user_message", content: "hi" } },
+      { message: { type: "assistant_message", content: "bye" } },
+    ]);
+    const out = await windowAroundInvocation(fn, TABLE, inv, { before: 5, after: 5 });
+    expect(out).toBe("USER: hi\n\nASSISTANT: bye"); // whole (short) transcript
+  });
+
+  it("elides a window longer than maxChars", async () => {
+    const big = "x".repeat(400);
+    const { fn } = mockQuery([
+      { message: { type: "user_message", content: big } },
+      { message: { type: "assistant_message", content: big } },
+    ]);
+    const out = await windowAroundInvocation(fn, TABLE, inv, { before: 5, after: 5, maxChars: 150 });
+    expect(out).toContain("chars elided");
+    expect(out.length).toBeLessThan(300);
+  });
+
+  it("drops rows from a different session + escapes LIKE wildcards (exact match)", async () => {
+    const { fn, calls } = mockQuery([
+      { message: { type: "user_message", content: "first", session_id: "S1" } },
+      { message: { type: "tool_call", tool_name: "Skill", tool_input: JSON.stringify({ skill: "posthog-smoke--kamo" }), session_id: "S1", timestamp: "t5" } },
+      { message: { type: "assistant_message", content: "did X", session_id: "S1" } },
+      { message: { type: "user_message", content: "LEAK from other session", session_id: "S2" } }, // collision → dropped
+    ]);
+    const out = await windowAroundInvocation(fn, TABLE, inv, { before: 5, after: 5 });
+    expect(calls[0]).toContain("ESCAPE '\\'");
+    expect(out).toContain("did X");
+    expect(out).not.toContain("LEAK from other session");
+  });
+});
diff --git a/tests/shared/skill-proposer.test.ts b/tests/shared/skill-proposer.test.ts
new file mode 100644
index 00000000..cb1a1c00
--- /dev/null
+++ b/tests/shared/skill-proposer.test.ts
@@ -0,0 +1,51 @@
+import { describe, it, expect, vi } from "vitest";
+import { parseEdits, proposeSkillEdit } from "../../src/skillify/skill-proposer.js";
+
+describe("parseEdits", () => {
+  it("parses a JSON array, tolerating fences/prose, dropping invalid ops + non-objects", () => {
+    const raw = "Sure:\n```json\n[" +
+      '{"op":"replace","target":"mock the client","content":"NEVER mock"},' +
+      '{"op":"bogus","target":"x"},' +              // invalid op → dropped
+      '"nope",' +                                    // non-object → dropped
+      '{"op":"append","content":"verify via API"}' +
+      "]\n```";
+    const edits = parseEdits(raw);
+    expect(edits).toEqual([
+      { op: "replace", target: "mock the client", content: "NEVER mock" },
+      { op: "append", content: "verify via API" },
+    ]);
+  });
+  it("returns [] when there's no array", () => {
+    expect(parseEdits("the model refused")).toEqual([]);
+  });
+});
+
+describe("proposeSkillEdit", () => {
+  const body = "## Rules\n1. mock the client\n2. skip flush";
+  const failures = ["mocked the client so the test passes even when the event never sends"];
+
+  it("applies the proposed edits to produce a candidate body", async () => {
+    const model = vi.fn(async (_s: string, _u: string) =>
+      '[{"op":"replace","target":"mock the client","content":"NEVER mock — assert on the real client"}]');
+    const p = await proposeSkillEdit(body, failures, { model });
+    expect(p.changed).toBe(true);
+    expect(p.editedBody).toContain("NEVER mock — assert on the real client");
+    // the optimizer is told to diagnose the recurring weakness + emit JSON edits
+    expect(model.mock.calls[0][0]).toMatch(/recurring weakness/i);
+    expect(model.mock.calls[0][1]).toContain("CONFIRMED FAILURES");
+  });
+
+  it("enforces the edit budget", async () => {
+    const model = vi.fn(async (_s: string, _u: string) =>
+      '[{"op":"append","content":"a"},{"op":"append","content":"b"},{"op":"append","content":"c"}]');
+    const p = await proposeSkillEdit(body, failures, { model, editBudget: 1 });
+    expect(p.edits).toHaveLength(1);
+    expect(p.editedBody).toContain("\na");
+    expect(p.editedBody).not.toContain("\nb");
+  });
+
+  it("is a no-op when the model fails or proposes nothing", async () => {
+    expect((await proposeSkillEdit(body, failures, { model: vi.fn(async () => { throw new Error("x"); }) })).changed).toBe(false);
+    expect((await proposeSkillEdit(body, failures, { model: vi.fn(async (_s: string, _u: string) => "no edits") })).changed).toBe(false);
+  });
+});
diff --git a/tests/shared/skill-publisher.test.ts b/tests/shared/skill-publisher.test.ts
new file mode 100644
index 00000000..62916504
--- /dev/null
+++ b/tests/shared/skill-publisher.test.ts
@@ -0,0 +1,62 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { splitFrontmatter, bumpVersion, publishSkillEdit } from "../../src/skillify/skill-publisher.js";
+
+describe("splitFrontmatter", () => {
+  it("splits frontmatter from body", () => {
+    const { frontmatter, body } = splitFrontmatter("---\nname: x\nversion: 3\n---\n## Body\nhi");
+    expect(frontmatter).toBe("---\nname: x\nversion: 3\n---\n");
+    expect(body).toBe("## Body\nhi");
+  });
+  it("handles a doc with no frontmatter", () => {
+    expect(splitFrontmatter("just body")).toEqual({ frontmatter: "", body: "just body" });
+  });
+});
+
+describe("bumpVersion", () => {
+  it("increments an existing version", () => {
+    const r = bumpVersion("---\nname: x\nversion: 4\n---\n");
+    expect(r.oldVersion).toBe(4);
+    expect(r.newVersion).toBe(5);
+    expect(r.frontmatter).toContain("version: 5");
+  });
+  it("inserts version 2 when absent (original treated as 1)", () => {
+    const r = bumpVersion("---\nname: x\n---\n");
+    expect(r).toMatchObject({ oldVersion: 1, newVersion: 2 });
+    expect(r.frontmatter).toMatch(/version: 2\n---\n$/);
+  });
+  it("creates a frontmatter block when the doc has none", () => {
+    const r = bumpVersion("");
+    expect(r).toMatchObject({ oldVersion: 1, newVersion: 2 });
+    expect(r.frontmatter).toBe("---\nversion: 2\n---\n");
+  });
+});
+
+describe("publishSkillEdit", () => {
+  let root: string;
+  beforeEach(() => { root = fs.mkdtempSync(path.join(os.tmpdir(), "pub-")); });
+  afterEach(() => { fs.rmSync(root, { recursive: true, force: true }); });
+
+  it("writes the bumped body and backs up the original", () => {
+    const dir = path.join(root, "posthog--kamo");
+    fs.mkdirSync(dir);
+    fs.writeFileSync(path.join(dir, "SKILL.md"), "---\nname: posthog\nauthor: kamo\nversion: 2\n---\n## Rules\n1. mock the client\n");
+
+    const res = publishSkillEdit(root, "posthog", "kamo", "## Rules\n1. NEVER mock — assert on the real client");
+
+    expect(res).toMatchObject({ oldVersion: 2, newVersion: 3 });
+    const written = fs.readFileSync(res.path, "utf8");
+    expect(written).toContain("version: 3");
+    expect(written).toContain("NEVER mock — assert on the real client");
+    expect(written).not.toContain("1. mock the client\n");
+    // backup preserves the prior version verbatim
+    expect(fs.readFileSync(res.backupPath, "utf8")).toContain("version: 2");
+    expect(fs.readFileSync(res.backupPath, "utf8")).toContain("1. mock the client");
+  });
+
+  it("throws when the skill isn't installed (caller decides what to do)", () => {
+    expect(() => publishSkillEdit(root, "missing", "x", "body")).toThrow();
+  });
+});
diff --git a/tests/shared/skillopt-engine.test.ts b/tests/shared/skillopt-engine.test.ts
new file mode 100644
index 00000000..b06fda27
--- /dev/null
+++ b/tests/shared/skillopt-engine.test.ts
@@ -0,0 +1,128 @@
+import { describe, it, expect, vi } from "vitest";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { runSkillOptCycle, readSkillBodyViaManifest, type ProposalRecord } from "../../src/skillify/skillopt-engine.js";
+import type { PulledManifest } from "../../src/skillify/manifest.js";
+
+const invRow = (skill: string, sid: string) => ({
+  message: { type: "tool_call", tool_name: "Skill", tool_input: JSON.stringify({ skill }), session_id: sid, timestamp: sid },
+  last_update_date: sid,
+});
+const transcript = (skill: string, sid: string, pushback: boolean) => [
+  { message: { type: "user_message", content: "do it" } },
+  { message: { type: "tool_call", tool_name: "Skill", tool_input: JSON.stringify({ skill }), timestamp: sid } },
+  { message: { type: "assistant_message", content: "done" } },
+  { message: { type: "user_message", content: pushback ? "no that's wrong, it mocks the client" : "thanks, perfect" } },
+];
+
+/** nBad skills, each 10 invocations with 5 pushback → each deficient. */
+function world(nBad: number) {
+  const invs: Array<Record<string, unknown>> = [];
+  const transcripts = new Map<string, Array<Record<string, unknown>>>();
+  for (let b = 0; b < nBad; b++) {
+    for (let i = 0; i < 10; i++) {
+      const sid = `b${b}s${i}`;
+      invs.push(invRow(`bad${b}--auth`, sid));
+      transcripts.set(sid, transcript(`bad${b}--auth`, sid, i < 5));
+    }
+  }
+  const query = vi.fn(async (sql: string) => {
+    if (sql.includes('"Skill"') && sql.includes("ORDER BY last_update_date")) return invs;
+    const m = sql.match(/\/sessions\/%([^%]+)%/);
+    return m ? (transcripts.get(m[1]) ?? []) : [];
+  });
+  return query;
+}
+
+const judge = () => vi.fn(async (_s: string, _u: string) => '{"success":0,"confidence":0.9,"reason":"mocks the client"}');
+const proposerModel = () => vi.fn(async (_s: string, _u: string) => '[{"op":"append","content":"Always verify via the PostHog API."}]');
+
+describe("runSkillOptCycle", () => {
+  it("fires when >=5 skills are deficient and writes a proposal per editable skill", async () => {
+    const written: ProposalRecord[] = [];
+    const res = await runSkillOptCycle({
+      query: world(6), sessionsTable: "sessions", now: "2026-06-05T00:00:00Z",
+      readSkillBody: () => "## Rules\n1. mock the client",
+      writeProposal: (r) => written.push(r),
+      detector: { judge: judge() }, proposer: { model: proposerModel() },
+    });
+    expect(res.fired).toBe(true);
+    expect(res.deficientCount).toBe(6);
+    expect(written).toHaveLength(6);
+    expect(written[0].candidateBody).toContain("Always verify via the PostHog API.");
+    expect(written[0]).toMatchObject({ invocations: 10, confirmedFailures: 5 });
+  });
+
+  it("does NOT fire below the threshold (no proposals, even though detection ran)", async () => {
+    const writeProposal = vi.fn();
+    const res = await runSkillOptCycle({
+      query: world(4), sessionsTable: "sessions", now: "t",
+      readSkillBody: () => "## Rules", writeProposal,
+      detector: { judge: judge() }, proposer: { model: proposerModel() },
+    });
+    expect(res).toMatchObject({ fired: false, deficientCount: 4 });
+    expect(res.proposals).toHaveLength(0);
+    expect(writeProposal).not.toHaveBeenCalled();
+  });
+
+  it("skips a deficient skill that isn't installed locally (no body to edit)", async () => {
+    const written: ProposalRecord[] = [];
+    const res = await runSkillOptCycle({
+      query: world(6), sessionsTable: "sessions", now: "t",
+      readSkillBody: (name) => (name === "bad0" ? null : "## Rules\n1. mock the client"),
+      writeProposal: (r) => written.push(r),
+      detector: { judge: judge() }, proposer: { model: proposerModel() },
+    });
+    expect(res.fired).toBe(true);
+    expect(written).toHaveLength(5);                       // bad0 skipped
+    expect(written.some((w) => w.name === "bad0")).toBe(false);
+  });
+
+  it("dedups against meta memory: a skill whose edit was already proposed isn't re-written", async () => {
+    const written: ProposalRecord[] = [];
+    const recorded: string[] = [];
+    const res = await runSkillOptCycle({
+      query: world(6), sessionsTable: "sessions", now: "t",
+      readSkillBody: () => "## Rules\n1. mock the client",
+      writeProposal: (r) => written.push(r),
+      detector: { judge: judge() }, proposer: { model: proposerModel() },
+      meta: {
+        prior: () => ["append: earlier idea"],          // fed to the proposer as context
+        has: (name) => name === "bad0",                  // bad0 already tried → dedup
+        record: (name) => recorded.push(name),
+      },
+    });
+    expect(res.fired).toBe(true);
+    expect(written).toHaveLength(5);                     // bad0 deduped
+    expect(written.some((w) => w.name === "bad0")).toBe(false);
+    expect(recorded).not.toContain("bad0");              // not recorded again
+    expect(res.proposals.find((p) => p.name === "bad0")!.changed).toBe(false);
+  });
+
+  it("reads a project-pulled skill body via the manifest's installRoot (not the cwd)", () => {
+    const projRoot = fs.mkdtempSync(path.join(os.tmpdir(), "proj-"));
+    try {
+      fs.mkdirSync(path.join(projRoot, "x--a"), { recursive: true });
+      fs.writeFileSync(path.join(projRoot, "x--a", "SKILL.md"), "---\nname: x\nauthor: a\n---\n## Body\nproject body");
+      const manifest = {
+        version: 1,
+        entries: [{ dirName: "x--a", name: "x", author: "a", installRoot: projRoot, projectKey: "", remoteVersion: 1, install: "project", installedAtVersion: 1, pulledAt: "", symlinks: [] }],
+      } as unknown as PulledManifest;
+      expect(readSkillBodyViaManifest("x", "a", manifest, "/nonexistent-global")).toBe("## Body\nproject body");
+      // no manifest entry + no fallback body → null (not a silent wrong-skill edit)
+      expect(readSkillBodyViaManifest("y", "b", manifest, "/nonexistent-global")).toBeNull();
+    } finally {
+      fs.rmSync(projRoot, { recursive: true, force: true });
+    }
+  });
+
+  it("honors a custom fireThreshold", async () => {
+    const res = await runSkillOptCycle({
+      query: world(3), sessionsTable: "sessions", now: "t", fireThreshold: 3,
+      readSkillBody: () => "## Rules", writeProposal: vi.fn(),
+      detector: { judge: judge() }, proposer: { model: proposerModel() },
+    });
+    expect(res.fired).toBe(true);
+  });
+});
diff --git a/tests/shared/skillopt-meta.test.ts b/tests/shared/skillopt-meta.test.ts
new file mode 100644
index 00000000..f92d8516
--- /dev/null
+++ b/tests/shared/skillopt-meta.test.ts
@@ -0,0 +1,54 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import {
+  fingerprintEdits, alreadyProposed, priorEditSummaries, metaEntryFor, loadMeta, appendMeta,
+} from "../../src/skillify/skillopt-meta.js";
+import type { Edit } from "../../src/skillify/skill-edits.js";
+
+const edits: Edit[] = [{ op: "append", content: "always flush" }, { op: "replace", target: "mock", content: "do not mock" }];
+
+describe("fingerprintEdits", () => {
+  it("is order-independent (same set → same fingerprint)", () => {
+    expect(fingerprintEdits(edits)).toBe(fingerprintEdits([...edits].reverse()));
+  });
+  it("differs for different content", () => {
+    expect(fingerprintEdits(edits)).not.toBe(fingerprintEdits([{ op: "append", content: "other" }]));
+  });
+});
+
+describe("alreadyProposed / priorEditSummaries", () => {
+  const meta = [metaEntryFor("posthog", "kamo", edits, "t1"), metaEntryFor("other", "x", [{ op: "append", content: "z" }], "t2")];
+  it("matches a prior proposal by skill + fingerprint", () => {
+    expect(alreadyProposed(meta, "posthog", "kamo", [...edits].reverse())).toBe(true);
+    expect(alreadyProposed(meta, "posthog", "kamo", [{ op: "append", content: "new" }])).toBe(false);
+    expect(alreadyProposed(meta, "nope", "kamo", edits)).toBe(false); // different skill
+  });
+  it("surfaces prior edit summaries only for the given skill", () => {
+    const prior = priorEditSummaries(meta, "posthog", "kamo");
+    expect(prior.length).toBe(2);
+    expect(prior.join(" ")).toContain("append");
+    expect(priorEditSummaries(meta, "posthog", "kamo").join(" ")).not.toContain('append: z'); // other skill's edit
+  });
+});
+
+describe("loadMeta / appendMeta", () => {
+  let file: string;
+  beforeEach(() => { file = path.join(fs.mkdtempSync(path.join(os.tmpdir(), "meta-")), "meta.jsonl"); });
+  afterEach(() => { fs.rmSync(path.dirname(file), { recursive: true, force: true }); });
+
+  it("round-trips entries and skips malformed lines", () => {
+    appendMeta(file, metaEntryFor("a", "b", edits, "t1"));
+    appendMeta(file, metaEntryFor("c", "d", [{ op: "append", content: "x" }], "t2"));
+    fs.appendFileSync(file, "{ not json }\n\n");
+    const loaded = loadMeta(file);
+    expect(loaded).toHaveLength(2);
+    expect(loaded[0].skill).toBe("a--b");
+    expect(loaded[0].status).toBe("proposed");
+  });
+
+  it("returns [] for a missing file", () => {
+    expect(loadMeta(path.join(os.tmpdir(), "does-not-exist-xyz.jsonl"))).toEqual([]);
+  });
+});
diff --git a/tests/shared/skillopt-trigger.test.ts b/tests/shared/skillopt-trigger.test.ts
index 7049e5da..ff3c3751 100644
--- a/tests/shared/skillopt-trigger.test.ts
+++ b/tests/shared/skillopt-trigger.test.ts
@@ -34,6 +34,7 @@ describe("runWeeklySkillOpt (auto-fire decision)", () => {
       tryLock: () => true,          // injected so the unit test touches no real lock file
       releaseLock: release,
       reload: () => over.state ?? {}, // in-lock re-read mirrors the pre-lock state by default
+      canFire: () => true,           // injected so the unit test doesn't read real credentials
       ...over,
     });
     return { res, saved, spawn, release };
@@ -47,6 +48,13 @@ describe("runWeeklySkillOpt (auto-fire decision)", () => {
     expect(release).toHaveBeenCalledTimes(1); // lock released after firing
   });
 
+  it("does NOT fire or stamp when logged out (preserves the throttle for a fresh login)", () => {
+    const { res, saved, spawn } = harness({ state: {}, canFire: () => false });
+    expect(res).toEqual({ fired: false, reason: "no-creds" });
+    expect(spawn).not.toHaveBeenCalled();
+    expect(saved).toEqual([]); // no stamp → next session after login fires
+  });
+
   it("does NOT fire when another process holds the weekly lock (cross-process race)", () => {
     const { res, saved, spawn } = harness({ state: {}, tryLock: () => false });
     expect(res).toEqual({ fired: false, reason: "locked" });
diff --git a/tests/shared/skills-active.test.ts b/tests/shared/skills-active.test.ts
deleted file mode 100644
index a7d1c295..00000000
--- a/tests/shared/skills-active.test.ts
+++ /dev/null
@@ -1,211 +0,0 @@
-import { describe, it, expect, beforeEach, afterEach } from "vitest";
-import fs from "node:fs";
-import os from "node:os";
-import path from "node:path";
-import {
-  listActiveOrgSkills,
-  sessionBucket,
-  buildSkillsActiveInsert,
-  buildSkillsActivePath,
-  skillRootsForCwd,
-  defaultSkillsRoot,
-} from "../../src/skillify/skills-active.js";
-import type { PulledManifest } from "../../src/skillify/manifest.js";
-
-/** Build a pull manifest from `(dirName, name, author)` triples (fills the rest with defaults). */
-function manifestOf(...rows: Array<{ dirName: string; name: string; author: string }>): PulledManifest {
-  return {
-    version: 1,
-    entries: rows.map(r => ({
-      dirName: r.dirName,
-      name: r.name,
-      author: r.author,
-      projectKey: "pk",
-      remoteVersion: 1,
-      install: "global" as const,
-      installRoot: "/install/root",
-      pulledAt: "2026-01-01T00:00:00.000Z",
-      symlinks: [],
-    })),
-  };
-}
-
-describe("listActiveOrgSkills", () => {
-  let root: string;
-  beforeEach(() => {
-    root = fs.mkdtempSync(path.join(os.tmpdir(), "skills-active-"));
-  });
-  afterEach(() => {
-    fs.rmSync(root, { recursive: true, force: true });
-  });
-
-  it("returns only manifest-recorded (pull-managed) dirs; excludes local-only + files", () => {
-    fs.mkdirSync(path.join(root, "posthog-event-smoke-testing--kamo.aghbalyan"));
-    fs.mkdirSync(path.join(root, "pg-deeplake-test-crash-debugging--sasun"));
-    fs.mkdirSync(path.join(root, "deploy--blue-green"));        // local-only `--` dir, NOT pulled — excluded
-    fs.mkdirSync(path.join(root, "plan-confirm-then-execute")); // bare local — excluded
-    fs.writeFileSync(path.join(root, "notes--x.txt"), "x");     // file, not dir — excluded
-    const manifest = manifestOf(
-      { dirName: "posthog-event-smoke-testing--kamo.aghbalyan", name: "posthog-event-smoke-testing", author: "kamo.aghbalyan" },
-      { dirName: "pg-deeplake-test-crash-debugging--sasun", name: "pg-deeplake-test-crash-debugging", author: "sasun" },
-    );
-
-    const got = listActiveOrgSkills([root], manifest);
-    expect(got).toEqual([
-      { name: "pg-deeplake-test-crash-debugging", author: "sasun", version: 1 },
-      { name: "posthog-event-smoke-testing", author: "kamo.aghbalyan", version: 1 },
-    ]); // sorted by name; exactly the 2 manifest-recorded skills; version defaults to 1 (no SKILL.md)
-    expect(got).toHaveLength(2); // local `deploy--blue-green` + bare + file all dropped
-  });
-
-  it("excludes a local-only dir whose name contains `--` when the manifest is empty (no false positive)", () => {
-    fs.mkdirSync(path.join(root, "deploy--blue-green")); // org-shaped name, but never pulled
-    expect(listActiveOrgSkills([root], manifestOf())).toEqual([]);
-  });
-
-  it("takes name/author from the manifest, not a dirname split (multi-`--` dir stays correct)", () => {
-    fs.mkdirSync(path.join(root, "some--weird--dirname"));
-    const manifest = manifestOf({ dirName: "some--weird--dirname", name: "some-skill", author: "first-last" });
-    expect(listActiveOrgSkills([root], manifest)).toEqual([{ name: "some-skill", author: "first-last", version: 1 }]);
-  });
-
-  it("returns [] for a missing skills root (never throws)", () => {
-    expect(listActiveOrgSkills([path.join(root, "does-not-exist")], manifestOf())).toEqual([]);
-  });
-
-  it("scans project + global roots and dedups a skill present in both (P2: --to project)", () => {
-    const projectRoot = fs.mkdtempSync(path.join(os.tmpdir(), "skills-active-proj-"));
-    try {
-      fs.mkdirSync(path.join(root, "a-skill--alice"));         // global-pulled
-      fs.mkdirSync(path.join(projectRoot, "a-skill--alice"));  // also project-pulled (dup)
-      fs.mkdirSync(path.join(projectRoot, "b-skill--bob"));    // project-only org skill
-      fs.mkdirSync(path.join(projectRoot, "local-only--x"));   // not in manifest → excluded
-      const manifest = manifestOf(
-        { dirName: "a-skill--alice", name: "a-skill", author: "alice" },
-        { dirName: "b-skill--bob", name: "b-skill", author: "bob" },
-      );
-      const got = listActiveOrgSkills([root, projectRoot], manifest);
-      expect(got).toEqual([
-        { name: "a-skill", author: "alice", version: 1 }, // counted once despite two roots
-        { name: "b-skill", author: "bob", version: 1 },   // picked up from the project root
-      ]);
-      expect(got).toHaveLength(2);
-    } finally {
-      fs.rmSync(projectRoot, { recursive: true, force: true });
-    }
-  });
-
-  it("reads the skill version from the installed SKILL.md frontmatter (enables v1-vs-v2)", () => {
-    fs.mkdirSync(path.join(root, "evolving-skill--sasun"));
-    fs.writeFileSync(
-      path.join(root, "evolving-skill--sasun", "SKILL.md"),
-      "---\nname: evolving-skill\nversion: 5\n---\nbody",
-    );
-    const manifest = manifestOf({ dirName: "evolving-skill--sasun", name: "evolving-skill", author: "sasun" });
-    expect(listActiveOrgSkills([root], manifest)).toEqual([{ name: "evolving-skill", author: "sasun", version: 5 }]);
-  });
-});
-
-describe("skillRootsForCwd", () => {
-  it("returns only the global root when no cwd is given", () => {
-    expect(skillRootsForCwd()).toEqual([defaultSkillsRoot()]);
-  });
-  it("adds the project-scoped <cwd>/.claude/skills root when cwd is given", () => {
-    expect(skillRootsForCwd("/home/u/proj")).toEqual([
-      defaultSkillsRoot(),
-      path.join("/home/u/proj", ".claude", "skills"),
-    ]);
-  });
-});
-
-describe("sessionBucket", () => {
-  it("is deterministic for the same session id", () => {
-    expect(sessionBucket("abc-123")).toBe(sessionBucket("abc-123"));
-  });
-  it("stays within [0, buckets)", () => {
-    for (const id of ["a", "b", "c", "xyz", "1874a6b2"]) {
-      const b = sessionBucket(id, 2);
-      expect(b).toBeGreaterThanOrEqual(0);
-      expect(b).toBeLessThan(2);
-    }
-  });
-  it("assigns both buckets across many ids (not constant)", () => {
-    const seen = new Set<number>();
-    for (let i = 0; i < 200; i++) seen.add(sessionBucket(`session-${i}`));
-    expect(seen).toEqual(new Set([0, 1])); // both arms populated → real randomization
-  });
-});
-
-describe("buildSkillsActivePath", () => {
-  const config = { userName: "kamo", orgName: "activeloop", workspaceId: "default" };
-
-  it("namespaces under /skills_active/, NOT /sessions/ (so summary readers exclude it)", () => {
-    const p = buildSkillsActivePath(config, "S1");
-    expect(p.startsWith("/skills_active/")).toBe(true);
-    expect(p.startsWith("/sessions/")).toBe(false);
-    // The exact filter the summary / raw-transcript readers use must NOT match this path.
-    expect(p.includes("/sessions/")).toBe(false);
-  });
-
-  it("embeds the full {user, org, workspace, session} tuple", () => {
-    expect(buildSkillsActivePath(config, "S1")).toBe(
-      "/skills_active/kamo/kamo_activeloop_default_S1.json",
-    );
-  });
-
-  it("falls back to `default` workspace when workspaceId is absent", () => {
-    // covers the `?? \"default\"` branch (mirrors buildSessionPath)
-    const p = buildSkillsActivePath(
-      { userName: "kamo", orgName: "activeloop", workspaceId: undefined as unknown as string },
-      "S1",
-    );
-    expect(p).toBe("/skills_active/kamo/kamo_activeloop_default_S1.json");
-  });
-});
-
-describe("buildSkillsActiveInsert", () => {
-  const base = {
-    sessionsTable: "sessions",
-    sessionPath: "/sessions/kamo/kamo_activeloop_hivemind_S1.jsonl",
-    filename: "kamo_activeloop_hivemind_S1.jsonl",
-    userName: "kamo",
-    projectName: "hivemind",
-    pluginVersion: "0.7.99",
-    sessionId: "S1",
-    cwd: "/home/kamo/proj",
-    skills: [{ name: "pg-deeplake-test-crash-debugging", author: "sasun", version: 3 }],
-    bucket: 1,
-    ts: "2026-06-03T00:00:00.000Z",
-  };
-
-  it("emits exactly ONE insert into the sessions table (no second mutation)", () => {
-    const sql = buildSkillsActiveInsert(base);
-    expect((sql.match(/INSERT INTO/g) ?? []).length).toBe(1);
-    expect((sql.match(/UPDATE /g) ?? []).length).toBe(0);
-    expect(sql).toContain('INSERT INTO "sessions"');
-  });
-
-  it("writes a skills_active message with the skills, count, and bucket", () => {
-    const sql = buildSkillsActiveInsert(base);
-    const m = sql.match(/'(\{.*\})'::jsonb/s);
-    expect(m).toBeTruthy();
-    const entry = JSON.parse(m![1]);
-    expect(entry.type).toBe("skills_active");
-    expect(entry.session_id).toBe("S1");
-    expect(entry.skills).toEqual([{ name: "pg-deeplake-test-crash-debugging", author: "sasun", version: 3 }]);
-    expect(entry.skills_count).toBe(1);
-    expect(entry.ab_bucket).toBe(1);
-  });
-
-  it("leaves message_embedding NULL (no daemon round-trip at SessionStart)", () => {
-    const sql = buildSkillsActiveInsert(base);
-    expect(sql).toMatch(/::jsonb,\s*NULL,/);
-  });
-
-  it("does NOT masquerade as a captured turn type", () => {
-    const sql = buildSkillsActiveInsert(base);
-    expect(sql).not.toContain('"type":"user_message"');
-    expect(sql).not.toContain('"type":"tool_call"');
-    expect(sql).not.toContain('"type":"assistant_message"');
-  });
-});
diff --git a/tests/shared/success-judge.test.ts b/tests/shared/success-judge.test.ts
new file mode 100644
index 00000000..7386c65b
--- /dev/null
+++ b/tests/shared/success-judge.test.ts
@@ -0,0 +1,44 @@
+import { describe, it, expect, vi } from "vitest";
+import { parseVerdict, judgeSuccess } from "../../src/skillify/success-judge.js";
+
+describe("parseVerdict", () => {
+  it("parses a clean JSON verdict", () => {
+    expect(parseVerdict('{"success":0,"confidence":0.9,"reason":"mocks the client"}'))
+      .toEqual({ success: 0, confidence: 0.9, reason: "mocks the client" });
+  });
+  it("tolerates ```json fences and surrounding prose", () => {
+    const raw = "Here is my judgment:\n```json\n{\"success\": 1, \"confidence\": 0.8, \"reason\": \"ok\"}\n```\nDone.";
+    expect(parseVerdict(raw)).toEqual({ success: 1, confidence: 0.8, reason: "ok" });
+  });
+  it("treats success false/\"0\" as failure and clamps confidence", () => {
+    expect(parseVerdict('{"success":false,"confidence":2,"reason":"x"}')).toMatchObject({ success: 0, confidence: 1 });
+    expect(parseVerdict('{"success":"0","confidence":-1,"reason":"x"}')).toMatchObject({ success: 0, confidence: 0 });
+  });
+  it("is conservative (success=1) on unparseable output", () => {
+    expect(parseVerdict("the model rambled with no json")).toMatchObject({ success: 1, confidence: 0 });
+  });
+});
+
+describe("judgeSuccess", () => {
+  it("returns the judged verdict from the injected model", async () => {
+    const model = vi.fn(async (_system: string, _user: string) => '{"success":0,"confidence":0.95,"reason":"no flush, event never sends"}');
+    const v = await judgeSuccess("USER: do X\n\nASSISTANT: mocked it", { model });
+    expect(v.success).toBe(0);
+    expect(model).toHaveBeenCalledOnce();
+    // the judge must be told to ignore mood (anti-sycophancy) + asked for JSON
+    expect(model.mock.calls[0][0]).toMatch(/praised-but-wrong|Ignore whether the user/i);
+  });
+
+  it("is conservative (success=1) when the model call throws — a flaky judge can't manufacture failure", async () => {
+    const v = await judgeSuccess("USER: x\n\nASSISTANT: y", { model: vi.fn(async () => { throw new Error("boom"); }) });
+    expect(v.success).toBe(1);
+    expect(v.reason).toContain("judge failed");
+  });
+
+  it("short-circuits an empty window without calling the model", async () => {
+    const model = vi.fn(async () => "{}");
+    const v = await judgeSuccess("   ", { model });
+    expect(v.success).toBe(1);
+    expect(model).not.toHaveBeenCalled();
+  });
+});