imonroe · imonroe · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/app/mcp_server.py b/app/mcp_server.py
@@ -3,6 +3,7 @@
 from app import memory as memory_mod
 from app.auth import build_verifier
 from app.config import get_settings
+from app.ranking import rerank_by_recency
 
 
 def build_mcp() -> FastMCP:
@@ -30,12 +31,18 @@ def add_memory(content: str, agent_id: str | None = None, metadata: dict | None
         return memory.add(content, **kwargs)
 
     @mcp.tool
-    def search_memories(query: str, limit: int = 10) -> dict:
+    def search_memories(query: str, limit: int = 10, recency_weight: float = 0.0) -> dict:
         """Search long-term memory by semantic similarity.
 
         Searches the single shared memory store for the user, across all agents.
+
+        recency_weight (0.0-1.0) optionally biases results toward more recently
+        created or updated memories. Leave it at 0 for pure semantic relevance;
+        raise it (e.g. 0.3) when the user asks what is *latest* or *current* and
+        recency matters more than an exact topical match.
         """
-        return memory.search(query=query, filters={"user_id": default_user}, top_k=limit)
+        results = memory.search(query=query, filters={"user_id": default_user}, top_k=limit)
+        return rerank_by_recency(results, recency_weight)
 
     @mcp.tool
     def list_memories() -> dict:

diff --git a/app/ranking.py b/app/ranking.py
@@ -0,0 +1,97 @@
+"""Optional post-search re-ranking that blends semantic similarity with recency.
+
+mem0/Qdrant rank purely by vector similarity. For a personal memory store the
+most *recent* relevant fact is often the one you actually want, so callers can
+opt into a recency boost that re-orders an already-similar result set without
+changing which memories are matched. With ``recency_weight=0`` (the default for
+both REST and MCP) this module is a no-op and the original order is preserved.
+"""
+
+import math
+from datetime import UTC, datetime
+
+# Keys a mem0 result may carry a timestamp under, in most-preferred-first order.
+_TIMESTAMP_KEYS = ("updated_at", "created_at")
+
+
+def _parse_timestamp(value: object) -> datetime | None:
+    if not isinstance(value, str) or not value.strip():
+        return None
+    text = value.strip()
+    # Normalize a trailing 'Z' (Zulu/UTC) to an explicit +00:00 offset. Python
+    # 3.11+ fromisoformat already accepts 'Z'; doing it here makes the UTC intent
+    # explicit and keeps parsing correct for any input form we hand it.
+    if text.endswith("Z"):
+        text = text[:-1] + "+00:00"
+    try:
+        dt = datetime.fromisoformat(text)
+    except ValueError:
+        return None
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=UTC)
+    return dt
+
+
+def _item_timestamp(item: dict) -> datetime | None:
+    for key in _TIMESTAMP_KEYS:
+        ts = _parse_timestamp(item.get(key))
+        if ts is not None:
+            return ts
+        meta = item.get("metadata")
+        if isinstance(meta, dict):
+            ts = _parse_timestamp(meta.get(key))
+            if ts is not None:
+                return ts
+    return None
+
+
+def rerank_by_recency(
+    results: dict,
+    recency_weight: float,
+    half_life_days: float = 30.0,
+    now: datetime | None = None,
+) -> dict:
+    """Re-order ``results['results']`` by a blend of similarity and recency.
+
+    ``recency_weight`` in [0, 1]: 0 leaves the order untouched (pure similarity),
+    1 sorts almost entirely by recency. Similarity scores are min-max normalized
+    across the returned set so the two components are comparable. Items without a
+    usable timestamp contribute a recency score of 0, so they are never boosted.
+
+    The same dict is returned; its items list is reordered in place and each item
+    gains a ``rerank_score`` for transparency. Anything that isn't the expected
+    ``{"results": [ {...}, ... ]}`` shape is passed through unchanged.
+    """
+    if recency_weight <= 0 or not isinstance(results, dict):
+        return results
+    items = results.get("results")
+    if not isinstance(items, list) or len(items) < 2:
+        return results
+    if not all(isinstance(it, dict) for it in items):
+        return results
+
+    weight = min(max(recency_weight, 0.0), 1.0)
+    now = now or datetime.now(UTC)
+    half_life = max(half_life_days, 1e-9)
+
+    scores = [float(it.get("score") or 0.0) for it in items]
+    lo, hi = min(scores), max(scores)
+    span = hi - lo
+
+    def _similarity(idx: int) -> float:
+        # With no spread between scores, similarity carries no signal; treat all
+        # results as equally similar so recency becomes the sole tiebreaker.
+        return 1.0 if span <= 0 else (scores[idx] - lo) / span
+
+    def _recency(item: dict) -> float:
+        ts = _item_timestamp(item)
+        if ts is None:
+            return 0.0
+        age_days = max((now - ts).total_seconds() / 86400.0, 0.0)
+        return math.exp(-math.log(2) * age_days / half_life)
+
+    for idx, item in enumerate(items):
+        item["rerank_score"] = (1 - weight) * _similarity(idx) + weight * _recency(item)
+
+    items.sort(key=lambda it: it.get("rerank_score", 0.0), reverse=True)
+    return results
diff --git a/app/rest.py b/app/rest.py
@@ -7,6 +7,7 @@
 from app import memory as memory_mod
 from app.auth import require_bearer
 from app.config import get_settings
+from app.ranking import rerank_by_recency
 
 router = APIRouter(dependencies=[Depends(require_bearer)])
 
@@ -31,6 +32,10 @@ class SearchRequest(BaseModel):
     agent_id: str | None = None
     run_id: str | None = None
     limit: int = Field(default=10, ge=1, le=100)
+    # Opt-in recency boost. 0 = pure semantic similarity (unchanged behavior),
+    # 1 = order almost entirely by how recently a memory was created/updated.
+    recency_weight: float = Field(default=0.0, ge=0.0, le=1.0)
+    recency_half_life_days: float = Field(default=30.0, gt=0.0)
 
 
 class UpdateMemoryRequest(BaseModel):
@@ -65,7 +70,8 @@ def add_memory(req: AddMemoryRequest) -> dict:
 def search_memories(req: SearchRequest) -> dict:
     memory = memory_mod.get_memory()
     filters = _scope_kwargs(req.user_id, req.agent_id, req.run_id)
-    return memory.search(query=req.query, filters=filters, top_k=req.limit)
+    results = memory.search(query=req.query, filters=filters, top_k=req.limit)
+    return rerank_by_recency(results, req.recency_weight, req.recency_half_life_days)
 
 
 @router.get("/memories")

diff --git a/docs/DEVELOPER_GUIDE.md b/docs/DEVELOPER_GUIDE.md
@@ -64,6 +64,9 @@ app/
                     user_id defaulted to MEM0_DEFAULT_USER_ID.
   rest.py           REST router under /api/v1 (mounted with prefix in main.py). Pydantic request
                     models, _scope_kwargs() for user/agent/run scoping, check_qdrant() helper.
+  ranking.py        rerank_by_recency(): optional, opt-in post-search re-ranking that blends
+                    mem0's similarity score with a recency decay. No-op when recency_weight=0,
+                    so default REST/MCP search behavior is unchanged.
   auth.py           require_bearer (REST dependency), CompositeVerifier and StaticTokenVerifier
                     wiring, build_verifier() selecting Phase 1 vs Phase 2.
   oauth.py          Phase 2 OAuth 2.1 + PKCE + DCR endpoints, JWT issuance, JWKS, AS/PR metadata.

diff --git a/docs/USER_GUIDE.md b/docs/USER_GUIDE.md
@@ -379,6 +379,14 @@ Tools: search_memories, add_memory, list_memories, get_memory, update_memory, de
 If your agent has no instruction file but does take a system prompt, the same four numbered rules
 work verbatim there.
 
+### Companion prompt packs
+
+Beyond the baseline rules above, [`docs/prompts/`](./prompts/README.md) collects reusable,
+copy-paste prompt packs for specific recurring tasks — [auto-capturing a session
+summary](./prompts/auto-capture.md), [research synthesis](./prompts/research-synthesis.md), and
+[meeting synthesis](./prompts/meeting-synthesis.md). They're documentation only (no server changes)
+and drive the same six tools.
+
 ## REST API reference
 
 All endpoints live under `/api/v1` and require `Authorization: Bearer <MEM0_API_KEY>`. Request and
@@ -413,6 +421,22 @@ curl -X POST https://mem0.your-domain.com/api/v1/memories/search \
   -d '{"query": "where do we host things?"}'
 ```
 
+**Recency boost (optional).** By default results are ordered purely by semantic
+similarity. When you care more about what's *latest* than what's the closest
+topical match, add `recency_weight` (0.0–1.0): `0` keeps the default order, `1`
+orders almost entirely by how recently each memory was created or updated. The
+half-life of the decay (default 30 days) is tunable via `recency_half_life_days`.
+
+```bash
+curl -X POST https://mem0.your-domain.com/api/v1/memories/search \
+  -H "Authorization: Bearer $MEM0_API_KEY" -H "Content-Type: application/json" \
+  -d '{"query": "current deploy target", "recency_weight": 0.4}'
+```
+
+When `recency_weight > 0`, each returned result carries a `rerank_score` showing
+the blended similarity-plus-recency value it was sorted by. The MCP
+`search_memories` tool accepts the same `recency_weight` argument.
+
 ### List memories — `GET /api/v1/memories`
 
 Query params: `agent_id`, `run_id`, `user_id`, `limit` (1–100, default 50).

diff --git a/docs/prompts/README.md b/docs/prompts/README.md
@@ -0,0 +1,38 @@
+# Companion prompts (skill packs)
+
+These are **copy-paste prompt packs** that make the memory server more useful for
+recurring kinds of work. They aren't code and they don't change the server — they
+ride on top of the six MCP tools (`search_memories`, `add_memory`,
+`list_memories`, `get_memory`, `update_memory`, `delete_memory`) that any
+connected client exposes.
+
+The idea is adapted from the [OB1 / Open Brain](https://github.com/NateBJones-Projects/OB1)
+project's "skill packs," reworked for this server's single-user model and tool names.
+
+For the baseline "always recall first, save durable facts, don't duplicate, don't
+store secrets" instruction block, see
+[Prompting agents to use memory](../USER_GUIDE.md#prompting-agents-to-use-memory)
+in the User Guide. The packs here are the next layer up: structured workflows for
+specific tasks.
+
+## How to use a pack
+
+1. Open the pack and copy its prompt block.
+2. Paste it into your client where instructions live — a Claude Project, a
+   `CLAUDE.md`, ChatGPT custom instructions, an `AGENTS.md`, or just inline at the
+   start of a chat.
+3. Adjust the tool names to match how your client surfaces them (Claude Code, for
+   example, namespaces them like `mcp__mem0-remote__search_memories`).
+
+All packs assume a single shared memory store: searches and lists span everything,
+and `agent_id` is only a write-time provenance tag (it never partitions reads).
+
+## Available packs
+
+| Pack | Use it when you want to… |
+|---|---|
+| [Auto-capture](./auto-capture.md) | Have the agent save a structured summary of a work session at the end, so the next session starts with context. |
+| [Research synthesis](./research-synthesis.md) | Turn sources or notes into findings with confidence levels and open questions, persisted to memory. |
+| [Meeting synthesis](./meeting-synthesis.md) | Turn meeting notes or a transcript into decisions, action items, and risks, persisted to memory. |
+
+Contributions of new packs are welcome — copy the shape of an existing file.
diff --git a/docs/prompts/auto-capture.md b/docs/prompts/auto-capture.md
@@ -0,0 +1,41 @@
+# Auto-capture: save a session summary
+
+**Goal:** when a working session wraps up, have the agent distill what happened
+into a few durable memories so the *next* session can recall it. This closes the
+loop that makes a personal memory store actually compound over time.
+
+Adapted from the OB1 "Auto-Capture" skill for this server's six tools.
+
+## Prompt block
+
+Paste into a `CLAUDE.md`, a Project's custom instructions, an `AGENTS.md`, or
+inline at the start of a session.
+
+```markdown
+## Session auto-capture
+
+When a working session is wrapping up — I say we're done, the task is finished,
+or the conversation is clearly ending — capture what's worth remembering:
+
+1. First call `search_memories` for the session's main topic to see what's
+   already stored, so you update instead of duplicating.
+2. Then save the durable takeaways with `add_memory`, one clear fact per call:
+   - decisions made and the reasoning behind them
+   - conventions, preferences, or constraints I stated
+   - unfinished work and the agreed next step
+   - useful facts discovered (paths, commands, config, names) likely to recur
+   Tag each with `agent_id` = "auto-capture" for provenance.
+3. If a saved memory is now wrong, `update_memory` it rather than adding a new one.
+4. Skip transient chatter, one-off details, and anything sensitive
+   (passwords, API keys, private personal data).
+5. Briefly list what you saved so I can correct it.
+```
+
+## Notes
+
+- Keep each memory a single, self-contained statement — "We deploy to CapRover
+  on push to `main`" beats a paragraph. Short facts retrieve and update cleanly.
+- The `agent_id` tag (`auto-capture`) is write-only provenance; it does **not**
+  scope future searches, so these memories surface for every client.
+- Pair this with the baseline "recall first" instruction so the next session
+  opens by reading what the previous one saved.
diff --git a/docs/prompts/meeting-synthesis.md b/docs/prompts/meeting-synthesis.md
@@ -0,0 +1,40 @@
+# Meeting synthesis
+
+**Goal:** turn raw meeting notes or a transcript into the things that actually
+matter afterward — decisions, action items, and risks — and persist the durable
+ones to memory so they resurface in later sessions.
+
+Adapted from the OB1 "Meeting Synthesis" skill for this server's six tools.
+
+## Prompt block
+
+```markdown
+## Meeting synthesis
+
+When I paste meeting notes or a transcript:
+
+1. Recall first: `search_memories` for the project or people involved to load
+   prior context and avoid contradicting earlier decisions.
+2. Produce four sections:
+   - **Decisions** — what was decided, each as a one-line statement.
+   - **Action items** — task, owner, and due date if stated.
+   - **Risks / open issues** — anything flagged as a concern or unresolved.
+   - **Deliverables** — concrete outputs expected downstream.
+3. Persist the durable items with `add_memory`, one per call — every decision,
+   and any action item or risk that outlives the meeting. Tag
+   `agent_id` = "meeting" and put a date in `metadata`, e.g.
+   `{"date": "2026-06-04"}`.
+4. If a decision supersedes one already in memory, `update_memory` the old one
+   instead of adding a conflicting record.
+5. Skip small talk and scheduling noise. Never store sensitive personal data.
+6. Output the four sections and note which items you saved.
+```
+
+## Notes
+
+- Decisions are the highest-value thing to persist; action items often live in a
+  tracker already, so save the ones you'll want recalled in conversation, not the
+  whole list.
+- Tagging with a `date` in metadata pairs well with the
+  [recency-boosted search](../USER_GUIDE.md#search-memories--post-apiv1memoriessearch)
+  option — raise `recency_weight` when you ask "what did we most recently decide?"
diff --git a/docs/prompts/research-synthesis.md b/docs/prompts/research-synthesis.md
@@ -0,0 +1,42 @@
+# Research synthesis
+
+**Goal:** turn raw sources — articles, docs, notes, search results — into
+structured findings with explicit confidence levels and open questions, and
+persist the durable conclusions to memory so later work can build on them.
+
+Adapted from the OB1 "Research Synthesis" skill for this server's six tools.
+
+## Prompt block
+
+```markdown
+## Research synthesis
+
+When I give you sources to synthesize (links, pasted text, notes, or a topic to
+research):
+
+1. Recall first: `search_memories` for the topic to surface anything I've already
+   concluded, so you extend rather than repeat prior work.
+2. Read the sources and produce:
+   - **Findings** — the key claims, each as a one-line statement.
+   - **Confidence** — mark each finding high / medium / low based on source
+     quality and agreement.
+   - **Contradictions** — where sources disagree, say so explicitly.
+   - **Open questions** — what's still unresolved and worth following up.
+3. Persist the durable conclusions with `add_memory`, one finding per call.
+   Put the confidence and a short source reference in `metadata`, e.g.
+   `{"confidence": "high", "source": "<url or title>"}`, and tag
+   `agent_id` = "research".
+4. Do not save low-confidence guesses as if they were facts; either omit them or
+   store them clearly labeled `"confidence": "low"`.
+5. Never store anything sensitive.
+6. End with the findings list and note which ones you saved.
+```
+
+## Notes
+
+- The `metadata` fields (`confidence`, `source`) are free-form today and are
+  stored on the memory. A future server change (see the provenance/review issue in
+  the backlog) may make them first-class and filterable; saving them now means the
+  data is already there when that lands.
+- Keep findings atomic — one claim per memory — so confidence and sources stay
+  attached to the right statement and updates are surgical.
diff --git a/tests/test_mcp.py b/tests/test_mcp.py
@@ -55,6 +55,22 @@ async def test_read_tools_do_not_expose_agent_id(mcp):
         assert "agent_id" not in props, name
 
 
+async def test_search_exposes_recency_weight(mcp):
+    async with Client(mcp) as client:
+        tools = {t.name: t for t in await client.list_tools()}
+    props = (tools["search_memories"].inputSchema or {}).get("properties", {})
+    assert "recency_weight" in props
+
+
+async def test_search_with_recency_weight_invokes_mem(mcp, mem):
+    mem.search.return_value = {"results": []}
+    async with Client(mcp) as client:
+        await client.call_tool("search_memories", {"query": "x", "recency_weight": 0.5})
+    _, kwargs = mem.search.call_args
+    assert kwargs["filters"] == {"user_id": "default-user"}
+    assert kwargs["top_k"] == 10
+
+
 async def test_list_memories_tool(mcp, mem):
     mem.get_all.return_value = {"results": []}
     async with Client(mcp) as client: