diff --git a/.gitignore b/.gitignore index 98b4f82..f073766 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,8 @@ flow_doctor.db # Fly deployment config — user-specific, must not be committed. # Template lives at fly.toml.example; copy + edit for your own deploy. fly.toml + +# Capture-attention calibration output — contains operator vault content +# (real memory titles + snippets), must not be committed. Schema template +# lives at tests/fixtures/capture_attention_pairs.example.json. +tests/fixtures/capture_attention_pairs.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 4add7ee..888d9a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,57 @@ # Changelog +## [0.7.0rc4] - 2026-05-24 + +### Capture-attention Phase A — activation infrastructure + +- **New `MNEMON_CAPTURE_ATTENTION_ENABLED` env-var override** on the + Phase A feature flag. Mirrors the standing-tier pattern + (`MNEMON_STANDING_TIER_ENABLED`) — operators can flip activation on + Fly via `flyctl secrets set` without a code change + redeploy, and + the next save picks it up without restarting the server. Accepts + `1`/`true`/`yes`/`on` (truthy) or `0`/`false`/`no`/`off` (falsy); + unset / unrecognized falls back to `config.CAPTURE_ATTENTION_ENABLED` + (still default `False` through soak). New + `store._capture_attention_enabled()` helper called at request time + from `Store.save` and `cli attention-status`. 5 new tests. +- **`mnemon attention-status` now reports the effective flag value** + with the env-var override applied — a Fly secret flip shows up here + immediately instead of misleading the operator with the unchanged + config default. + +### Calibration fixture privacy hardening + +- **`tests/fixtures/capture_attention_pairs.json` is now gitignored.** + PR #153 shipped this path tracked with a placeholder schema — + intended as a seed, but every operator calibration run overwrites + it with real vault titles + snippets (personal context, in-flight + work, etc.) that must not land in a public-repo commit. The + placeholder schema moves to + `tests/fixtures/capture_attention_pairs.example.json` (tracked) so + future contributors still see the format; the operator output stays + local-only. + +### Calibration script fixes (`scripts/calibrate_capture_threshold.py`) + +- **`VecStore.get(vec_id) -> np.ndarray | None`** added — mirrors the + `has` / `delete` single-id shape; returns a defensive copy. The + calibration script's `vs.get(vec_id)` call site failed on first + invocation because the method did not exist. 3 new tests (returns + vector, missing → None, defensive-copy invariant). +- **Near-neighbor pair sampling** replaces uniform-random. The previous + random sample across a 2510-memory vault produced pair cosines + clustered at 0.1-0.4 (clearly-different topics) — operator verdicts + carried no information about whether the threshold cut should be + 0.80 or 0.85. New sampler picks anchors, takes each one's top + non-self neighbor above cosine 0.55 (well below the lowest + calibration threshold so edge-negatives survive), and sorts + descending so the operator tags high-confidence near-dupes first. + Verified against the 2026-05-24 prod snapshot: 20-pair sample spans + cosine 0.751-0.999, entirely in the calibration-relevant range. + Calibration on that snapshot recommended + `CAPTURE_ATTENTION_THRESHOLD = 0.85` — matches the existing default, + so no config change needed. + ## [0.7.0rc3] - 2026-05-22 ### Test coverage diff --git a/pyproject.toml b/pyproject.toml index 5042ef3..2073afa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "mnemon-memory" -version = "0.7.0rc3" +version = "0.7.0rc4" description = "Universal long-term memory layer for AI agents via MCP" readme = "README.md" license = "MIT" diff --git a/scripts/calibrate_capture_threshold.py b/scripts/calibrate_capture_threshold.py index c28c354..d4e98e6 100755 --- a/scripts/calibrate_capture_threshold.py +++ b/scripts/calibrate_capture_threshold.py @@ -43,7 +43,21 @@ def _load_pairs(db_path: Path, n: int) -> list[dict]: - """Sample N random memory pairs + their pairwise cosine similarity.""" + """Sample N near-neighbor memory pairs from the threshold decision region. + + The naive uniform-random sample over a 2510-memory vault produces pairs + whose cosines cluster at 0.1–0.4 (clearly-different topics) — operator + verdicts on those pairs carry no information about whether the + CAPTURE_ATTENTION_THRESHOLD cut should be 0.80 or 0.85. Every pair + a calibration operator tags should sit in the decision region (cosine + near the candidate thresholds). + + Strategy: pick a random anchor, take its top non-self neighbor via + vector search, and accept the pair if cosine ≥ ``MIN_PAIR_COSINE``. + Repeat until ``n`` pairs are collected or the search budget is + exhausted. Bias toward higher-cosine pairs is the desired calibration + behavior — the threshold lives in the high-cosine tail. + """ import numpy as np src = REPO_ROOT / "src" @@ -51,6 +65,10 @@ def _load_pairs(db_path: Path, n: int) -> list[dict]: sys.path.insert(0, str(src)) from mnemon.vecstore import VecStore + MIN_PAIR_COSINE = 0.55 # well below the lowest calibration threshold (0.70) + # — preserves edge-negatives the threshold should NOT flag + MAX_ATTEMPTS = max(n * 20, 200) # generous budget for the rejection loop + vec_path = str(db_path).replace(".sqlite", ".vec") if not Path(vec_path + ".npz").exists(): sys.exit( @@ -72,31 +90,54 @@ def _load_pairs(db_path: Path, n: int) -> list[dict]: ORDER BY id""" ).fetchall() - # Build hash → embedding map (seq=0 only — that's the full-doc fragment) - embs: dict[str, "np.ndarray"] = {} + # Build hash → (id, title, embedding) map (seq=0 only — full-doc fragment). + by_hash: dict[str, dict] = {} for r in rows: vec_id = f"{r['hash']}_0" vec = vs.get(vec_id) if vec is not None: - embs[r["hash"]] = vec + by_hash[r["hash"]] = {"id": r["id"], "title": r["title"], "vec": vec} - eligible = [r for r in rows if r["hash"] in embs] - if len(eligible) < 2 * n: + if len(by_hash) < n + 5: sys.exit( - f"ERROR: only {len(eligible)} eligible memories in vault " - f"(need ≥{2 * n} for {n} pairs)" + f"ERROR: only {len(by_hash)} eligible memories in vault " + f"(need at least {n + 5} for {n} near-neighbor pairs)" ) random.seed(42) - chosen = random.sample(eligible, 2 * n) - pairs = [] - for i in range(0, 2 * n, 2): - a, b = chosen[i], chosen[i + 1] - va, vb = embs[a["hash"]], embs[b["hash"]] - cos = float(np.dot(va, vb) / (np.linalg.norm(va) * np.linalg.norm(vb))) - # Pull content snippets for review - ac = db.execute("SELECT doc FROM content WHERE hash = ?", (a["hash"],)).fetchone() - bc = db.execute("SELECT doc FROM content WHERE hash = ?", (b["hash"],)).fetchone() + candidate_hashes = list(by_hash.keys()) + pairs: list[dict] = [] + seen_pair_keys: set[tuple[str, str]] = set() + attempts = 0 + + while len(pairs) < n and attempts < MAX_ATTEMPTS: + attempts += 1 + anchor_hash = random.choice(candidate_hashes) + anchor = by_hash[anchor_hash] + # k=3 → first hit is the anchor itself (cosine 1.0); take the next + # distinct hash. Occasionally a vault has duplicate-content fragments + # so we filter by hash, not just rank. + results = vs.search(anchor["vec"], k=3) + neighbor = None + for res in results: + res_hash = res["id"].rsplit("_", 1)[0] + if res_hash == anchor_hash or res_hash not in by_hash: + continue + neighbor = (res_hash, float(res["similarity"])) + break + if neighbor is None: + continue + nhash, cos = neighbor + if cos < MIN_PAIR_COSINE: + continue + pair_key = tuple(sorted([anchor_hash, nhash])) + if pair_key in seen_pair_keys: + continue + seen_pair_keys.add(pair_key) + + a, b = by_hash[anchor_hash], by_hash[nhash] + ac = db.execute("SELECT doc FROM content WHERE hash = ?", (anchor_hash,)).fetchone() + bc = db.execute("SELECT doc FROM content WHERE hash = ?", (nhash,)).fetchone() pairs.append({ "id_a": a["id"], "id_b": b["id"], "title_a": a["title"], "title_b": b["title"], @@ -104,7 +145,17 @@ def _load_pairs(db_path: Path, n: int) -> list[dict]: "snippet_b": (bc["doc"] if bc else "")[:200], "cosine": cos, }) + db.close() + if len(pairs) < n: + print( + f"WARNING: only {len(pairs)} pairs found above cosine " + f"{MIN_PAIR_COSINE} in {attempts} attempts — vault may lack " + f"semantic clusters. Proceeding with what we have." + ) + # Sort by cosine descending so the operator tags high-confidence + # near-dupes first (catches the calibration intuition early). + pairs.sort(key=lambda p: -p["cosine"]) return pairs diff --git a/src/mnemon/__init__.py b/src/mnemon/__init__.py index b789769..1242577 100644 --- a/src/mnemon/__init__.py +++ b/src/mnemon/__init__.py @@ -1,3 +1,3 @@ """mnemon — Universal long-term memory layer for AI agents via MCP.""" -__version__ = "0.7.0rc3" +__version__ = "0.7.0rc4" diff --git a/src/mnemon/cli.py b/src/mnemon/cli.py index 186e1ae..a7b1b8a 100644 --- a/src/mnemon/cli.py +++ b/src/mnemon/cli.py @@ -413,10 +413,10 @@ def _print_attention_status(store) -> None: 2. precision floor (operator-judged via --review, not auto-checked) """ from .config import ( - CAPTURE_ATTENTION_ENABLED, CAPTURE_ATTENTION_THRESHOLD, CAPTURE_ATTENTION_SOAK_BOOST_RATE_MAX, ) + from .store import _capture_attention_enabled # Boost rate over 7d (boosts = restates relations created) boosts_7d = store.db.execute( @@ -431,8 +431,10 @@ def _print_attention_status(store) -> None: rate = (boosts_7d / saves_7d) if saves_7d else 0.0 rate_ok = "✓" if rate <= CAPTURE_ATTENTION_SOAK_BOOST_RATE_MAX else "⚠" + # Effective flag value reflects MNEMON_CAPTURE_ATTENTION_ENABLED env-var + # override; a Fly secret flip shows up here without restarting the server. print(f"Capture attention — soak status") - print(f" Flag enabled : {CAPTURE_ATTENTION_ENABLED}") + print(f" Flag enabled : {_capture_attention_enabled()}") print(f" Threshold (cosine) : {CAPTURE_ATTENTION_THRESHOLD}") print(f" Boost-rate 7d : {boosts_7d} / {saves_7d} = " f"{rate:.3f} {rate_ok} (ceiling {CAPTURE_ATTENTION_SOAK_BOOST_RATE_MAX})") diff --git a/src/mnemon/store.py b/src/mnemon/store.py index 81bec97..54443d0 100644 --- a/src/mnemon/store.py +++ b/src/mnemon/store.py @@ -8,6 +8,7 @@ from __future__ import annotations import hashlib +import os import sqlite3 import time import uuid @@ -19,7 +20,6 @@ from .config import ( CAPTURE_ATTENTION_BOOST, - CAPTURE_ATTENTION_ENABLED, CAPTURE_ATTENTION_MIN_HITS, CAPTURE_ATTENTION_REQUIRE_DISTINCT_SESSIONS, CAPTURE_ATTENTION_THRESHOLD, @@ -39,6 +39,29 @@ from .vecstore import VecStore +def _capture_attention_enabled() -> bool: + """Resolve the capture-attention feature flag (env-var override). + + Truth sources, in order: + 1. ``MNEMON_CAPTURE_ATTENTION_ENABLED`` env var (operator override) — + lets the operator flip activation on Fly via ``flyctl secrets + set`` without a code change + redeploy. + 2. ``config.CAPTURE_ATTENTION_ENABLED`` (default-off through soak). + + Mirrors the standing-tier helper in + ``hooks/context_surfacing.py:_standing_tier_enabled``. Called at + request time (in ``Store.save``), so secret flips take effect on + the next save without restarting the server. + """ + env = os.environ.get("MNEMON_CAPTURE_ATTENTION_ENABLED", "").strip().lower() + if env in ("1", "true", "yes", "on"): + return True + if env in ("0", "false", "no", "off"): + return False + from .config import CAPTURE_ATTENTION_ENABLED + return CAPTURE_ATTENTION_ENABLED + + class CaptureAttentionUnavailableError(RuntimeError): """Raised when the capture-attention path can't complete its check. @@ -459,7 +482,7 @@ def save( # secondary observability hung off a primary path (the save # itself) that survives independently. Mirrors the existing # embed_document() WARN pattern in server.py:memory_save. - if CAPTURE_ATTENTION_ENABLED and correction_of is None: + if _capture_attention_enabled() and correction_of is None: try: self.apply_capture_attention( new_doc_id=doc_id, content=content, diff --git a/src/mnemon/vecstore.py b/src/mnemon/vecstore.py index 6ca3010..893b81e 100644 --- a/src/mnemon/vecstore.py +++ b/src/mnemon/vecstore.py @@ -86,6 +86,17 @@ def size(self) -> int: def has(self, vec_id: str) -> bool: return vec_id in self._ids + def get(self, vec_id: str) -> np.ndarray | None: + """Return the vector for ``vec_id``, or ``None`` if not present. + + Returns a defensive copy — callers can mutate freely without + affecting the in-memory store (matches ``export_all``'s contract). + """ + if vec_id not in self._ids or self._vectors is None: + return None + idx = self._ids.index(vec_id) + return self._vectors[idx].copy() + def delete(self, vec_id: str) -> bool: if vec_id not in self._ids: return False diff --git a/tests/fixtures/capture_attention_pairs.example.json b/tests/fixtures/capture_attention_pairs.example.json new file mode 100644 index 0000000..f6bbce8 --- /dev/null +++ b/tests/fixtures/capture_attention_pairs.example.json @@ -0,0 +1,13 @@ +[ + { + "_comment": "Schema template for capture-attention threshold calibration. The real fixture at tests/fixtures/capture_attention_pairs.json is gitignored because it contains real operator vault snippets (titles + content) — never commit operator output. Generate yours by running `python scripts/calibrate_capture_threshold.py --db ` against a fresh snapshot. Schema: each entry needs cosine + verdict; recommend() tolerates 'unclear' verdicts.", + "id_a": 0, + "id_b": 0, + "title_a": "placeholder", + "title_b": "placeholder", + "snippet_a": "synthetic seed — operator-tagged output is gitignored", + "snippet_b": "synthetic seed — operator-tagged output is gitignored", + "cosine": 0.0, + "verdict": "unclear" + } +] diff --git a/tests/fixtures/capture_attention_pairs.json b/tests/fixtures/capture_attention_pairs.json deleted file mode 100644 index af4fcf3..0000000 --- a/tests/fixtures/capture_attention_pairs.json +++ /dev/null @@ -1,13 +0,0 @@ -[ - { - "_comment": "Seed fixture for capture-attention threshold calibration. Replace with operator-tagged pairs from the real vault via `python scripts/calibrate_capture_threshold.py`. The schema is locked: each entry needs cosine + verdict, and the recommend() function tolerates 'unclear' verdicts.", - "id_a": 0, - "id_b": 0, - "title_a": "placeholder", - "title_b": "placeholder", - "snippet_a": "synthetic seed — replace with calibration output", - "snippet_b": "synthetic seed — replace with calibration output", - "cosine": 0.0, - "verdict": "unclear" - } -] diff --git a/tests/test_capture_attention.py b/tests/test_capture_attention.py index 78d9b61..f7c6a26 100644 --- a/tests/test_capture_attention.py +++ b/tests/test_capture_attention.py @@ -21,7 +21,11 @@ import pytest from mnemon import config -from mnemon.store import CaptureAttentionUnavailableError, Store +from mnemon.store import ( + CaptureAttentionUnavailableError, + Store, + _capture_attention_enabled, +) # ── Fixtures ────────────────────────────────────────────────────── @@ -44,12 +48,13 @@ def store(): @pytest.fixture def attention_on(monkeypatch): - """Flip the feature flag on for the test scope.""" + """Flip the feature flag on for the test scope. + + The helper ``_capture_attention_enabled`` re-reads ``config`` at + every call, so a single monkeypatch on the config constant covers + every call site (Store.save and CLI status alike). + """ monkeypatch.setattr(config, "CAPTURE_ATTENTION_ENABLED", True) - # store.py read the constant via module import — patch the - # in-module reference too - import mnemon.store - monkeypatch.setattr(mnemon.store, "CAPTURE_ATTENTION_ENABLED", True) def _fake_embed_constant(_text: str) -> np.ndarray: @@ -89,6 +94,48 @@ def _set_created_at(store, doc_id: int, days_ago: int) -> None: store.db.commit() +# ── Feature flag resolution (env-var override) ──────────────────── + + +class TestFeatureFlagResolution: + """``MNEMON_CAPTURE_ATTENTION_ENABLED`` env var must take precedence + over the config default — mirrors ``MNEMON_STANDING_TIER_ENABLED`` + pattern so operators can flip activation on Fly via ``flyctl secrets + set`` without a code change + redeploy. + """ + + def test_defaults_to_config_when_env_unset(self, monkeypatch): + monkeypatch.delenv("MNEMON_CAPTURE_ATTENTION_ENABLED", raising=False) + monkeypatch.setattr(config, "CAPTURE_ATTENTION_ENABLED", False) + assert _capture_attention_enabled() is False + monkeypatch.setattr(config, "CAPTURE_ATTENTION_ENABLED", True) + assert _capture_attention_enabled() is True + + @pytest.mark.parametrize("truthy", ["1", "true", "True", "TRUE", "yes", "on"]) + def test_env_truthy_overrides_config_false(self, monkeypatch, truthy): + monkeypatch.setattr(config, "CAPTURE_ATTENTION_ENABLED", False) + monkeypatch.setenv("MNEMON_CAPTURE_ATTENTION_ENABLED", truthy) + assert _capture_attention_enabled() is True + + @pytest.mark.parametrize("falsy", ["0", "false", "False", "FALSE", "no", "off"]) + def test_env_falsy_overrides_config_true(self, monkeypatch, falsy): + monkeypatch.setattr(config, "CAPTURE_ATTENTION_ENABLED", True) + monkeypatch.setenv("MNEMON_CAPTURE_ATTENTION_ENABLED", falsy) + assert _capture_attention_enabled() is False + + def test_env_unrecognized_falls_back_to_config(self, monkeypatch): + monkeypatch.setattr(config, "CAPTURE_ATTENTION_ENABLED", True) + monkeypatch.setenv("MNEMON_CAPTURE_ATTENTION_ENABLED", "maybe") + assert _capture_attention_enabled() is True + monkeypatch.setattr(config, "CAPTURE_ATTENTION_ENABLED", False) + assert _capture_attention_enabled() is False + + def test_env_whitespace_stripped(self, monkeypatch): + monkeypatch.setattr(config, "CAPTURE_ATTENTION_ENABLED", False) + monkeypatch.setenv("MNEMON_CAPTURE_ATTENTION_ENABLED", " true ") + assert _capture_attention_enabled() is True + + # ── Schema migration ────────────────────────────────────────────── diff --git a/tests/test_vecstore.py b/tests/test_vecstore.py index d249127..e6c5cdf 100644 --- a/tests/test_vecstore.py +++ b/tests/test_vecstore.py @@ -39,6 +39,24 @@ def test_delete(self, vecstore): assert vecstore.size() == 0 assert not vecstore.delete("a_0") + def test_get_returns_vector(self, vecstore): + vecstore.set("a_0", np.array([1, 0, 0, 0], dtype=np.float32)) + vec = vecstore.get("a_0") + assert vec is not None + np.testing.assert_array_equal(vec, np.array([1, 0, 0, 0], dtype=np.float32)) + + def test_get_missing_returns_none(self, vecstore): + assert vecstore.get("nonexistent") is None + vecstore.set("a_0", np.array([1, 0, 0, 0], dtype=np.float32)) + assert vecstore.get("b_0") is None + + def test_get_returns_defensive_copy(self, vecstore): + vecstore.set("a_0", np.array([1, 0, 0, 0], dtype=np.float32)) + vec = vecstore.get("a_0") + vec[0] = 99.0 + vec2 = vecstore.get("a_0") + assert vec2[0] == 1.0 + def test_search_cosine(self, vecstore): vecstore.set("a_0", np.array([1, 0, 0, 0], dtype=np.float32)) vecstore.set("b_0", np.array([0, 1, 0, 0], dtype=np.float32))