Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/mnemon/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,16 @@ class MemoryType(str, Enum):
RRF_K = 60
MMR_THRESHOLD = 0.6 # bigram Jaccard ≥ this → candidate is "too similar" to a selected result
MMR_DEMOTION_FACTOR = 0.5 # composite-score multiplier applied to MMR-demoted results
# Layer 4 (stored-injection defense): a flat composite-score multiplier
# applied to HOOK_SOURCE_CLIENTS results at rank time. Stacks on top of
# the HOOK_SOURCE_CONFIDENCE_CEILING save-time cap — the cap only moves
# the 0.25-weighted confidence term (≤0.025 composite delta), too weak
# on its own to stop an auto-captured transcript fragment dominating
# unprompted recall (the 2026-05-18 Desktop incident). 0.85 ≈ a hook
# capture needs ~18% more relevance+recency to tie an equal-relevance
# user-authored memory. Explicit memory_get(id) bypasses composite
# scoring entirely, so direct lookups are unaffected.
PROVENANCE_DEMOTION_FACTOR = 0.85
COMPOSITE_WEIGHTS = (0.5, 0.25, 0.25) # (relevance, recency, confidence)
RECENCY_HALF_LIFE_DAYS = 30
PIN_BOOST = 0.3
Expand Down
19 changes: 18 additions & 1 deletion src/mnemon/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@

from .config import (
COMPOSITE_WEIGHTS,
HOOK_SOURCE_CLIENTS,
MMR_DEMOTION_FACTOR,
MMR_THRESHOLD,
PROVENANCE_DEMOTION_FACTOR,
QUERY_EXPANSION_MAX_TOKENS,
RECENCY_HALF_LIFE_DAYS,
RRF_K,
Expand All @@ -39,6 +41,9 @@ class ScoredResult:
source: str
composite_score: float = 0.0
recency_score: float = 0.0
# Provenance of the underlying save, preserved for observability and
# so downstream consumers can see the Layer 4 demotion was applied.
source_client: str | None = None
# Raw cosine similarity from the vector store, preserved through RRF
# fusion so clients can do true-similarity comparisons (e.g., dedup)
# without trying to reverse-engineer it from composite_score. None
Expand All @@ -57,10 +62,20 @@ def compute_recency(created_at: str) -> float:


def composite_score(result: SearchResult) -> ScoredResult:
"""Apply composite scoring: relevance + recency + confidence."""
"""Apply composite scoring: relevance + recency + confidence.

Layer 4: results whose ``source_client`` is in
:data:`HOOK_SOURCE_CLIENTS` (best-effort auto-captured transcripts,
not deliberate user assertions) have their composite multiplied by
:data:`PROVENANCE_DEMOTION_FACTOR` so they cannot outrank an
equal-relevance user-authored memory in unprompted recall. This is
rank-only — explicit ``memory_get(id)`` does not pass through here.
"""
w_rel, w_rec, w_conf = COMPOSITE_WEIGHTS
recency = compute_recency(result.created_at)
composite = w_rel * result.score + w_rec * recency + w_conf * result.confidence
if result.source_client in HOOK_SOURCE_CLIENTS:
composite *= PROVENANCE_DEMOTION_FACTOR

return ScoredResult(
doc_id=result.doc_id,
Expand All @@ -74,6 +89,7 @@ def composite_score(result: SearchResult) -> ScoredResult:
source=result.source,
composite_score=composite,
recency_score=recency,
source_client=result.source_client,
)


Expand Down Expand Up @@ -152,6 +168,7 @@ def rrf_fuse(*result_sets: list[SearchResult]) -> list[SearchResult]:
created_at=r.created_at,
score=0,
source="fused",
source_client=r.source_client,
),
}

Expand Down
10 changes: 9 additions & 1 deletion src/mnemon/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ class SearchResult:
created_at: str
score: float
source: str = "bm25"
# Provenance — the save's source_client. Carried through search +
# RRF fusion so composite scoring can apply the Layer 4 provenance
# demotion (auto-captured transcripts must not outrank deliberate
# user assertions at equal relevance). None for legacy/unknown.
source_client: str | None = None


@dataclass
Expand Down Expand Up @@ -435,6 +440,7 @@ def search_bm25(self, query: str, limit: int = 20) -> list[SearchResult]:
d.memory_type,
d.confidence,
d.created_at,
d.source_client,
rank * -1 AS bm25_score
FROM documents_fts fts
JOIN documents d ON d.id = fts.rowid
Expand All @@ -457,6 +463,7 @@ def search_bm25(self, query: str, limit: int = 20) -> list[SearchResult]:
created_at=r["created_at"],
score=r["bm25_score"],
source="bm25",
source_client=r["source_client"],
)
for r in rows
]
Expand Down Expand Up @@ -485,7 +492,7 @@ def search_vector(self, embedding: np.ndarray, limit: int = 20) -> list[SearchRe
content_hash = vr["id"].split("_")[0]
row = self.db.execute(
"""SELECT d.id AS doc_id, d.title, c.doc AS content, d.content_type,
d.memory_type, d.confidence, d.created_at
d.memory_type, d.confidence, d.created_at, d.source_client
FROM documents d
JOIN content c ON d.hash = c.hash
WHERE d.hash = ? AND d.invalidated_at IS NULL
Expand All @@ -505,6 +512,7 @@ def search_vector(self, embedding: np.ndarray, limit: int = 20) -> list[SearchRe
created_at=row["created_at"],
score=vr["similarity"],
source="vector",
source_client=row["source_client"],
))

if len(results) >= limit:
Expand Down
65 changes: 65 additions & 0 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import pytest

from mnemon.config import PROVENANCE_DEMOTION_FACTOR
from mnemon.search import (
ScoredResult,
_bigrams,
Expand Down Expand Up @@ -55,6 +56,70 @@ def test_composite_combines_signals(self):
assert scored.recency_score > 0


class TestProvenanceDemotion:
"""Layer 4 — auto-captured transcript memories must not outrank an
equal-relevance deliberate user assertion in unprompted recall."""

def _result(self, source_client):
return SearchResult(
doc_id=1,
title="Test",
content="Hello",
content_type="note",
memory_type="semantic",
confidence=0.5,
created_at="2026-04-09T00:00:00",
score=1.0,
source_client=source_client,
)

def test_hook_source_is_demoted_by_exactly_the_factor(self):
user = composite_score(self._result(None))
hook = composite_score(self._result("claude-code-hook"))
assert hook.composite_score == pytest.approx(
user.composite_score * PROVENANCE_DEMOTION_FACTOR
)
assert hook.composite_score < user.composite_score

def test_non_hook_sources_not_demoted(self):
# compute_recency reads datetime.now(), so two calls differ by a
# sub-millisecond epsilon — compare with approx, not exact ==.
baseline = composite_score(self._result(None)).composite_score
for sc in (None, "mnemon-mirror", "claude-desktop", "cli"):
assert composite_score(self._result(sc)).composite_score == pytest.approx(
baseline, rel=1e-6
)

def test_source_client_carried_into_scored_result(self):
assert composite_score(self._result("claude-code-hook")).source_client == (
"claude-code-hook"
)

def test_hook_capture_ranks_below_equal_user_memory(self):
# Identical relevance/recency/confidence — provenance is the only
# differentiator; the user memory must sort first.
user = composite_score(self._result(None))
hook = composite_score(self._result("claude-code-hook"))
assert sorted(
[hook, user], key=lambda r: r.composite_score, reverse=True
)[0] is user

def test_provenance_survives_rrf_fusion(self):
hook = SearchResult(
doc_id=7, title="H", content="h", content_type="note",
memory_type="semantic", confidence=0.5, created_at="2026-04-09",
score=1.0, source_client="claude-code-hook",
)
fused = rrf_fuse([hook])
assert fused[0].source_client == "claude-code-hook"
# And the demotion then actually fires on the fused result.
assert composite_score(fused[0]).composite_score == pytest.approx(
composite_score(
SearchResult(**{**fused[0].__dict__, "source_client": None})
).composite_score * PROVENANCE_DEMOTION_FACTOR
)


class TestMMR:
def test_bigrams(self):
bg = _bigrams("hello world foo")
Expand Down
17 changes: 17 additions & 0 deletions tests/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,23 @@ def test_search_excludes_forgotten(self, store):
results = store.search_bm25("searchable")
assert len(results) == 0

def test_search_carries_source_client_provenance(self, store):
# Layer 4: source_client must survive save → search_bm25 so
# composite scoring can apply the provenance demotion.
store.save(
title="Hooked",
content="provenance threaded content",
source_client="claude-code-hook",
)
store.save(
title="Authored",
content="provenance threaded user assertion",
)
by_title = {r.title: r for r in store.search_bm25("provenance threaded")}
assert by_title["Hooked"].source_client == "claude-code-hook"
# An explicit/user save has no hook provenance.
assert by_title["Authored"].source_client != "claude-code-hook"


class TestRelations:
def test_add_and_get_related(self, store):
Expand Down
Loading