Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions app/mcp_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from app import memory as memory_mod
from app.auth import build_verifier
from app.config import get_settings
from app.ranking import rerank_by_recency


def build_mcp() -> FastMCP:
Expand Down Expand Up @@ -30,12 +31,18 @@ def add_memory(content: str, agent_id: str | None = None, metadata: dict | None
return memory.add(content, **kwargs)

@mcp.tool
def search_memories(query: str, limit: int = 10) -> dict:
def search_memories(query: str, limit: int = 10, recency_weight: float = 0.0) -> dict:
"""Search long-term memory by semantic similarity.

Searches the single shared memory store for the user, across all agents.

recency_weight (0.0-1.0) optionally biases results toward more recently
created or updated memories. Leave it at 0 for pure semantic relevance;
raise it (e.g. 0.3) when the user asks what is *latest* or *current* and
recency matters more than an exact topical match.
"""
return memory.search(query=query, filters={"user_id": default_user}, top_k=limit)
results = memory.search(query=query, filters={"user_id": default_user}, top_k=limit)
return rerank_by_recency(results, recency_weight)

@mcp.tool
def list_memories() -> dict:
Expand Down
97 changes: 97 additions & 0 deletions app/ranking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""Optional post-search re-ranking that blends semantic similarity with recency.

mem0/Qdrant rank purely by vector similarity. For a personal memory store the
most *recent* relevant fact is often the one you actually want, so callers can
opt into a recency boost that re-orders an already-similar result set without
changing which memories are matched. With ``recency_weight=0`` (the default for
both REST and MCP) this module is a no-op and the original order is preserved.
"""

import math
from datetime import UTC, datetime

# Keys a mem0 result may carry a timestamp under, in most-preferred-first order.
_TIMESTAMP_KEYS = ("updated_at", "created_at")


def _parse_timestamp(value: object) -> datetime | None:
if not isinstance(value, str) or not value.strip():
return None
text = value.strip()
# Normalize a trailing 'Z' (Zulu/UTC) to an explicit +00:00 offset. Python
# 3.11+ fromisoformat already accepts 'Z'; doing it here makes the UTC intent
# explicit and keeps parsing correct for any input form we hand it.
if text.endswith("Z"):
text = text[:-1] + "+00:00"
try:
dt = datetime.fromisoformat(text)
except ValueError:
return None
if dt.tzinfo is None:
dt = dt.replace(tzinfo=UTC)
return dt


def _item_timestamp(item: dict) -> datetime | None:
for key in _TIMESTAMP_KEYS:
ts = _parse_timestamp(item.get(key))
if ts is not None:
return ts
meta = item.get("metadata")
if isinstance(meta, dict):
ts = _parse_timestamp(meta.get(key))
if ts is not None:
return ts
return None


def rerank_by_recency(
results: dict,
recency_weight: float,
half_life_days: float = 30.0,
now: datetime | None = None,
) -> dict:
"""Re-order ``results['results']`` by a blend of similarity and recency.

``recency_weight`` in [0, 1]: 0 leaves the order untouched (pure similarity),
1 sorts almost entirely by recency. Similarity scores are min-max normalized
across the returned set so the two components are comparable. Items without a
usable timestamp contribute a recency score of 0, so they are never boosted.

The same dict is returned; its items list is reordered in place and each item
gains a ``rerank_score`` for transparency. Anything that isn't the expected
``{"results": [ {...}, ... ]}`` shape is passed through unchanged.
"""
if recency_weight <= 0 or not isinstance(results, dict):
return results
items = results.get("results")
if not isinstance(items, list) or len(items) < 2:
return results
if not all(isinstance(it, dict) for it in items):
return results

weight = min(max(recency_weight, 0.0), 1.0)
now = now or datetime.now(UTC)
half_life = max(half_life_days, 1e-9)

scores = [float(it.get("score") or 0.0) for it in items]
lo, hi = min(scores), max(scores)
span = hi - lo

def _similarity(idx: int) -> float:
# With no spread between scores, similarity carries no signal; treat all
# results as equally similar so recency becomes the sole tiebreaker.
return 1.0 if span <= 0 else (scores[idx] - lo) / span

def _recency(item: dict) -> float:
ts = _item_timestamp(item)
if ts is None:
return 0.0
age_days = max((now - ts).total_seconds() / 86400.0, 0.0)
return math.exp(-math.log(2) * age_days / half_life)

for idx, item in enumerate(items):
item["rerank_score"] = (1 - weight) * _similarity(idx) + weight * _recency(item)

items.sort(key=lambda it: it.get("rerank_score", 0.0), reverse=True)
return results
8 changes: 7 additions & 1 deletion app/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from app import memory as memory_mod
from app.auth import require_bearer
from app.config import get_settings
from app.ranking import rerank_by_recency

router = APIRouter(dependencies=[Depends(require_bearer)])

Expand All @@ -31,6 +32,10 @@ class SearchRequest(BaseModel):
agent_id: str | None = None
run_id: str | None = None
limit: int = Field(default=10, ge=1, le=100)
# Opt-in recency boost. 0 = pure semantic similarity (unchanged behavior),
# 1 = order almost entirely by how recently a memory was created/updated.
recency_weight: float = Field(default=0.0, ge=0.0, le=1.0)
recency_half_life_days: float = Field(default=30.0, gt=0.0)


class UpdateMemoryRequest(BaseModel):
Expand Down Expand Up @@ -65,7 +70,8 @@ def add_memory(req: AddMemoryRequest) -> dict:
def search_memories(req: SearchRequest) -> dict:
memory = memory_mod.get_memory()
filters = _scope_kwargs(req.user_id, req.agent_id, req.run_id)
return memory.search(query=req.query, filters=filters, top_k=req.limit)
results = memory.search(query=req.query, filters=filters, top_k=req.limit)
return rerank_by_recency(results, req.recency_weight, req.recency_half_life_days)


@router.get("/memories")
Expand Down
3 changes: 3 additions & 0 deletions docs/DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ app/
user_id defaulted to MEM0_DEFAULT_USER_ID.
rest.py REST router under /api/v1 (mounted with prefix in main.py). Pydantic request
models, _scope_kwargs() for user/agent/run scoping, check_qdrant() helper.
ranking.py rerank_by_recency(): optional, opt-in post-search re-ranking that blends
mem0's similarity score with a recency decay. No-op when recency_weight=0,
so default REST/MCP search behavior is unchanged.
auth.py require_bearer (REST dependency), CompositeVerifier and StaticTokenVerifier
wiring, build_verifier() selecting Phase 1 vs Phase 2.
oauth.py Phase 2 OAuth 2.1 + PKCE + DCR endpoints, JWT issuance, JWKS, AS/PR metadata.
Expand Down
24 changes: 24 additions & 0 deletions docs/USER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,14 @@ Tools: search_memories, add_memory, list_memories, get_memory, update_memory, de
If your agent has no instruction file but does take a system prompt, the same four numbered rules
work verbatim there.

### Companion prompt packs

Beyond the baseline rules above, [`docs/prompts/`](./prompts/README.md) collects reusable,
copy-paste prompt packs for specific recurring tasks — [auto-capturing a session
summary](./prompts/auto-capture.md), [research synthesis](./prompts/research-synthesis.md), and
[meeting synthesis](./prompts/meeting-synthesis.md). They're documentation only (no server changes)
and drive the same six tools.

## REST API reference

All endpoints live under `/api/v1` and require `Authorization: Bearer <MEM0_API_KEY>`. Request and
Expand Down Expand Up @@ -413,6 +421,22 @@ curl -X POST https://mem0.your-domain.com/api/v1/memories/search \
-d '{"query": "where do we host things?"}'
```

**Recency boost (optional).** By default results are ordered purely by semantic
similarity. When you care more about what's *latest* than what's the closest
topical match, add `recency_weight` (0.0–1.0): `0` keeps the default order, `1`
orders almost entirely by how recently each memory was created or updated. The
half-life of the decay (default 30 days) is tunable via `recency_half_life_days`.

```bash
curl -X POST https://mem0.your-domain.com/api/v1/memories/search \
-H "Authorization: Bearer $MEM0_API_KEY" -H "Content-Type: application/json" \
-d '{"query": "current deploy target", "recency_weight": 0.4}'
```

When `recency_weight > 0`, each returned result carries a `rerank_score` showing
the blended similarity-plus-recency value it was sorted by. The MCP
`search_memories` tool accepts the same `recency_weight` argument.

### List memories — `GET /api/v1/memories`

Query params: `agent_id`, `run_id`, `user_id`, `limit` (1–100, default 50).
Expand Down
38 changes: 38 additions & 0 deletions docs/prompts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Companion prompts (skill packs)

These are **copy-paste prompt packs** that make the memory server more useful for
recurring kinds of work. They aren't code and they don't change the server — they
ride on top of the six MCP tools (`search_memories`, `add_memory`,
`list_memories`, `get_memory`, `update_memory`, `delete_memory`) that any
connected client exposes.

The idea is adapted from the [OB1 / Open Brain](https://github.com/NateBJones-Projects/OB1)
project's "skill packs," reworked for this server's single-user model and tool names.

For the baseline "always recall first, save durable facts, don't duplicate, don't
store secrets" instruction block, see
[Prompting agents to use memory](../USER_GUIDE.md#prompting-agents-to-use-memory)
in the User Guide. The packs here are the next layer up: structured workflows for
specific tasks.

## How to use a pack

1. Open the pack and copy its prompt block.
2. Paste it into your client where instructions live — a Claude Project, a
`CLAUDE.md`, ChatGPT custom instructions, an `AGENTS.md`, or just inline at the
start of a chat.
3. Adjust the tool names to match how your client surfaces them (Claude Code, for
example, namespaces them like `mcp__mem0-remote__search_memories`).

All packs assume a single shared memory store: searches and lists span everything,
and `agent_id` is only a write-time provenance tag (it never partitions reads).

## Available packs

| Pack | Use it when you want to… |
|---|---|
| [Auto-capture](./auto-capture.md) | Have the agent save a structured summary of a work session at the end, so the next session starts with context. |
| [Research synthesis](./research-synthesis.md) | Turn sources or notes into findings with confidence levels and open questions, persisted to memory. |
| [Meeting synthesis](./meeting-synthesis.md) | Turn meeting notes or a transcript into decisions, action items, and risks, persisted to memory. |

Contributions of new packs are welcome — copy the shape of an existing file.
41 changes: 41 additions & 0 deletions docs/prompts/auto-capture.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Auto-capture: save a session summary

**Goal:** when a working session wraps up, have the agent distill what happened
into a few durable memories so the *next* session can recall it. This closes the
loop that makes a personal memory store actually compound over time.

Adapted from the OB1 "Auto-Capture" skill for this server's six tools.

## Prompt block

Paste into a `CLAUDE.md`, a Project's custom instructions, an `AGENTS.md`, or
inline at the start of a session.

```markdown
## Session auto-capture

When a working session is wrapping up — I say we're done, the task is finished,
or the conversation is clearly ending — capture what's worth remembering:

1. First call `search_memories` for the session's main topic to see what's
already stored, so you update instead of duplicating.
2. Then save the durable takeaways with `add_memory`, one clear fact per call:
- decisions made and the reasoning behind them
- conventions, preferences, or constraints I stated
- unfinished work and the agreed next step
- useful facts discovered (paths, commands, config, names) likely to recur
Tag each with `agent_id` = "auto-capture" for provenance.
3. If a saved memory is now wrong, `update_memory` it rather than adding a new one.
4. Skip transient chatter, one-off details, and anything sensitive
(passwords, API keys, private personal data).
5. Briefly list what you saved so I can correct it.
```

## Notes

- Keep each memory a single, self-contained statement — "We deploy to CapRover
on push to `main`" beats a paragraph. Short facts retrieve and update cleanly.
- The `agent_id` tag (`auto-capture`) is write-only provenance; it does **not**
scope future searches, so these memories surface for every client.
- Pair this with the baseline "recall first" instruction so the next session
opens by reading what the previous one saved.
40 changes: 40 additions & 0 deletions docs/prompts/meeting-synthesis.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Meeting synthesis

**Goal:** turn raw meeting notes or a transcript into the things that actually
matter afterward — decisions, action items, and risks — and persist the durable
ones to memory so they resurface in later sessions.

Adapted from the OB1 "Meeting Synthesis" skill for this server's six tools.

## Prompt block

```markdown
## Meeting synthesis

When I paste meeting notes or a transcript:

1. Recall first: `search_memories` for the project or people involved to load
prior context and avoid contradicting earlier decisions.
2. Produce four sections:
- **Decisions** — what was decided, each as a one-line statement.
- **Action items** — task, owner, and due date if stated.
- **Risks / open issues** — anything flagged as a concern or unresolved.
- **Deliverables** — concrete outputs expected downstream.
3. Persist the durable items with `add_memory`, one per call — every decision,
and any action item or risk that outlives the meeting. Tag
`agent_id` = "meeting" and put a date in `metadata`, e.g.
`{"date": "2026-06-04"}`.
4. If a decision supersedes one already in memory, `update_memory` the old one
instead of adding a conflicting record.
5. Skip small talk and scheduling noise. Never store sensitive personal data.
6. Output the four sections and note which items you saved.
```

## Notes

- Decisions are the highest-value thing to persist; action items often live in a
tracker already, so save the ones you'll want recalled in conversation, not the
whole list.
- Tagging with a `date` in metadata pairs well with the
[recency-boosted search](../USER_GUIDE.md#search-memories--post-apiv1memoriessearch)
option — raise `recency_weight` when you ask "what did we most recently decide?"
42 changes: 42 additions & 0 deletions docs/prompts/research-synthesis.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Research synthesis

**Goal:** turn raw sources — articles, docs, notes, search results — into
structured findings with explicit confidence levels and open questions, and
persist the durable conclusions to memory so later work can build on them.

Adapted from the OB1 "Research Synthesis" skill for this server's six tools.

## Prompt block

```markdown
## Research synthesis

When I give you sources to synthesize (links, pasted text, notes, or a topic to
research):

1. Recall first: `search_memories` for the topic to surface anything I've already
concluded, so you extend rather than repeat prior work.
2. Read the sources and produce:
- **Findings** — the key claims, each as a one-line statement.
- **Confidence** — mark each finding high / medium / low based on source
quality and agreement.
- **Contradictions** — where sources disagree, say so explicitly.
- **Open questions** — what's still unresolved and worth following up.
3. Persist the durable conclusions with `add_memory`, one finding per call.
Put the confidence and a short source reference in `metadata`, e.g.
`{"confidence": "high", "source": "<url or title>"}`, and tag
`agent_id` = "research".
4. Do not save low-confidence guesses as if they were facts; either omit them or
store them clearly labeled `"confidence": "low"`.
5. Never store anything sensitive.
6. End with the findings list and note which ones you saved.
```

## Notes

- The `metadata` fields (`confidence`, `source`) are free-form today and are
stored on the memory. A future server change (see the provenance/review issue in
the backlog) may make them first-class and filterable; saving them now means the
data is already there when that lands.
- Keep findings atomic — one claim per memory — so confidence and sources stay
attached to the right statement and updates are surgical.
16 changes: 16 additions & 0 deletions tests/test_mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,22 @@ async def test_read_tools_do_not_expose_agent_id(mcp):
assert "agent_id" not in props, name


async def test_search_exposes_recency_weight(mcp):
async with Client(mcp) as client:
tools = {t.name: t for t in await client.list_tools()}
props = (tools["search_memories"].inputSchema or {}).get("properties", {})
assert "recency_weight" in props


async def test_search_with_recency_weight_invokes_mem(mcp, mem):
mem.search.return_value = {"results": []}
async with Client(mcp) as client:
await client.call_tool("search_memories", {"query": "x", "recency_weight": 0.5})
_, kwargs = mem.search.call_args
assert kwargs["filters"] == {"user_id": "default-user"}
assert kwargs["top_k"] == 10


async def test_list_memories_tool(mcp, mem):
mem.get_all.return_value = {"results": []}
async with Client(mcp) as client:
Expand Down
Loading
Loading