# Tuning Search and Retrieval

Deep-dive into `RetrievalConfig` — every option demonstrated with real data.

**Requirements**: `OPENAI_API_KEY` in `.env`.

## Setup + Seed Data

Import 6 conversations covering different topics with overlapping file access.

In [None]:
import shutil
import tempfile
from pathlib import Path

from dotenv import load_dotenv

from tracemem_core import (
    DefaultResourceExtractor,
    Message,
    RetrievalConfig,
    ToolCall,
    TraceMem,
    TraceMemConfig,
)

load_dotenv()

_tmpdir = tempfile.mkdtemp(prefix="tracemem_tutorial_")


def project_path(rel: str) -> str:
    """Absolute path within the temp project."""
    return str(Path(_tmpdir) / rel)


config = TraceMemConfig(home=Path(_tmpdir) / ".tracemem")
tm = TraceMem(
    config=config,
    resource_extractor=DefaultResourceExtractor(mode="local", home=config.home),
)
await tm.__aenter__()
print("TraceMem ready.")

In [4]:
# Seed data: 6 conversations across different topics with overlapping files
conversations = {
    "conv-auth": [
        Message(role="user", content="Add JWT authentication middleware to our FastAPI application."),
        Message(role="assistant", content="I'll implement JWT auth.", tool_calls=[
            ToolCall(id="t1", name="read_file", args={"file_path": project_path("src/auth.py")}),
        ]),
        Message(role="tool", content="# empty auth module\n", tool_call_id="t1"),
        Message(role="assistant", content="I've added JWT token verification with HS256, extracting sub and exp claims. The get_current_user dependency can be injected into any route.", tool_calls=[
            ToolCall(id="t2", name="edit_file", args={"file_path": project_path("src/auth.py")}),
        ]),
        Message(role="tool", content="import jwt\nfrom fastapi import Depends\n\nasync def get_current_user(token):\n    payload = jwt.decode(token, SECRET)\n    return payload\n", tool_call_id="t2"),
        Message(role="assistant", content="JWT authentication is now set up in src/auth.py."),
    ],
    "conv-database": [
        Message(role="user", content="Set up async PostgreSQL connection pooling with SQLAlchemy."),
        Message(role="assistant", content="Setting up async database.", tool_calls=[
            ToolCall(id="t3", name="edit_file", args={"file_path": project_path("src/database.py")}),
        ]),
        Message(role="tool", content="from sqlalchemy.ext.asyncio import create_async_engine\nengine = create_async_engine(DB_URL, pool_size=20)\n", tool_call_id="t3"),
        Message(role="assistant", content="Database module created with asyncpg driver and pool_size=20."),
    ],
    "conv-api": [
        Message(role="user", content="Design REST API endpoints for user management with CRUD operations."),
        Message(role="assistant", content="I'll create the user API routes.", tool_calls=[
            ToolCall(id="t4", name="edit_file", args={"file_path": project_path("src/routes/users.py")}),
        ]),
        Message(role="tool", content="from fastapi import APIRouter\nrouter = APIRouter(prefix='/users')\n\n@router.get('/')\nasync def list_users(): ...\n", tool_call_id="t4"),
        Message(role="assistant", content="Created user CRUD endpoints: GET /users, POST /users, GET /users/{id}, PUT /users/{id}, DELETE /users/{id}.", tool_calls=[
            ToolCall(id="t5", name="read_file", args={"file_path": project_path("src/auth.py")}),
        ]),
        Message(role="tool", content="import jwt\nfrom fastapi import Depends\n# JWT auth module\n", tool_call_id="t5"),
        Message(role="assistant", content="Integrated auth dependency into protected user endpoints."),
    ],
    "conv-testing": [
        Message(role="user", content="Write pytest tests for the authentication module with mocked JWT."),
        Message(role="assistant", content="I'll read the auth module and write tests.", tool_calls=[
            ToolCall(id="t6", name="read_file", args={"file_path": project_path("src/auth.py")}),
        ]),
        Message(role="tool", content="import jwt\n# JWT auth implementation\n", tool_call_id="t6"),
        Message(role="assistant", content="Created tests covering valid tokens, expired tokens, and malformed tokens.", tool_calls=[
            ToolCall(id="t7", name="edit_file", args={"file_path": project_path("tests/test_auth.py")}),
        ]),
        Message(role="tool", content="import pytest\nfrom src.auth import get_current_user\n\ndef test_valid_token(): ...\ndef test_expired_token(): ...\n", tool_call_id="t7"),
        Message(role="assistant", content="Tests are ready. Run with `pytest tests/test_auth.py -v`."),
    ],
    "conv-deploy": [
        Message(role="user", content="Create a Dockerfile and docker-compose.yml for production deployment."),
        Message(role="assistant", content="I'll create the deployment files.", tool_calls=[
            ToolCall(id="t8", name="edit_file", args={"file_path": project_path("Dockerfile")}),
        ]),
        Message(role="tool", content="FROM python:3.12-slim\nWORKDIR /app\nCOPY . .\nRUN pip install -r requirements.txt\nCMD [\"uvicorn\", \"main:app\"]\n", tool_call_id="t8"),
        Message(role="assistant", content="Dockerfile uses python:3.12-slim with uvicorn.", tool_calls=[
            ToolCall(id="t9", name="edit_file", args={"file_path": project_path("docker-compose.yml")}),
        ]),
        Message(role="tool", content="version: '3.8'\nservices:\n  app:\n    build: .\n    ports: ['8000:8000']\n  db:\n    image: postgres:16\n", tool_call_id="t9"),
        Message(role="assistant", content="Docker Compose set up with app service and PostgreSQL 16."),
    ],
    "conv-debug": [
        Message(role="user", content="Debug the authentication error: users get 401 even with valid tokens."),
        Message(role="assistant", content="Let me check the auth module for issues.", tool_calls=[
            ToolCall(id="t10", name="read_file", args={"file_path": project_path("src/auth.py")}),
        ]),
        Message(role="tool", content="import jwt\nfrom fastapi import Depends\n\nasync def get_current_user(token):\n    payload = jwt.decode(token, SECRET, algorithms=['HS256'])\n    return payload\n", tool_call_id="t10"),
        Message(role="assistant", content="Found the issue: the token extraction isn't using HTTPBearer properly. The `token` parameter receives the full Authorization header, not just the JWT. I'll fix it.", tool_calls=[
            ToolCall(id="t11", name="edit_file", args={"file_path": project_path("src/auth.py")}),
        ]),
        Message(role="tool", content="import jwt\nfrom fastapi import Depends\nfrom fastapi.security import HTTPBearer, HTTPAuthorizationCredentials\n\nsecurity = HTTPBearer()\n\nasync def get_current_user(creds: HTTPAuthorizationCredentials = Depends(security)):\n    payload = jwt.decode(creds.credentials, SECRET, algorithms=['HS256'])\n    return payload\n", tool_call_id="t11"),
        Message(role="assistant", content="Fixed. The auth middleware now correctly uses HTTPBearer to extract the token from the Authorization header. The 401 errors should be resolved."),
    ],
}

for conv_id, messages in conversations.items():
    await tm.import_trace(conv_id, messages)
    print(f"Imported {conv_id}")

print(f"\nSeeded {len(conversations)} conversations.")

Imported conv-auth
Imported conv-database
Imported conv-api
Imported conv-testing
Imported conv-deploy
Imported conv-debug

Seeded 6 conversations.


## Default vs. Tuned Search

Default returns up to 10 results. Limit to 3 for focused answers.

In [5]:
query = "authentication setup"

default_results = await tm.search(query)
tuned_results = await tm.search(query, config=RetrievalConfig(limit=3))

print(f"Default search: {len(default_results)} results")
print(f"Tuned search (limit=3): {len(tuned_results)} results")
print()
for r in tuned_results:
    print(r)

Default search: 6 results
Tuned search (limit=3): 3 results

Result(12cccdd4, score=0.033, conv=conv-debug, ts=2026-02-08 17:25, text='Debug the authentication error: users get 401 even with vali...', context=yes)
Result(07eda3a2, score=0.032, conv=conv-testing, ts=2026-02-08 17:25, text='Write pytest tests for the authentication module with mocked...', context=yes)
Result(c86ae3de, score=0.031, conv=conv-auth, ts=2026-02-08 17:25, text='Add JWT authentication middleware to our FastAPI application...', context=yes)


## Lightweight Mode: `include_context=False`

Skip graph traversal for faster search — only vector + text scores, no agent responses or tool uses.

In [6]:
full = await tm.search(query, config=RetrievalConfig(limit=2, include_context=True))
light = await tm.search(query, config=RetrievalConfig(limit=2, include_context=False))

print("=== With context ===")
for r in full:
    print(f"  text: {r.text[:80]}")
    print(f"  context: {r.context}")
    print()

print("=== Without context (lightweight) ===")
for r in light:
    print(f"  text: {r.text[:80]}")
    print(f"  context: {r.context}")
    print()

=== With context ===
  text: Debug the authentication error: users get 401 even with valid tokens.
  context: Context(user[12cccdd4]='Debug the authentication error: users get 401 even with valid tokens.', agent[f0a6d2e9]='Let me check the auth module for issues.', tools=[READ_FILE(file://src/auth.py rv=8e6ae3ed res=a72d059d)])

  text: Write pytest tests for the authentication module with mocked JWT.
  context: Context(user[07eda3a2]='Write pytest tests for the authentication module with mocked JWT.', agent[25736a61]="I'll read the auth module and write tests.", tools=[READ_FILE(file://src/auth.py rv=8c3f618f res=a72d059d)])

=== Without context (lightweight) ===
  text: Debug the authentication error: users get 401 even with valid tokens.
  context: None

  text: Add JWT authentication middleware to our FastAPI application.
  context: None



## Vector Weight Tuning

Control the balance between semantic similarity (vector) and keyword matching (text search).
- `0.0` = pure keyword/FTS
- `0.5` = balanced (default)
- `1.0` = pure semantic

In [7]:
query = "database connection"

for weight in [0.0, 0.5, 1.0]:
    results = await tm.search(
        query,
        config=RetrievalConfig(limit=3, vector_weight=weight, include_context=False),
    )
    print(f"\n--- vector_weight={weight} ---")
    for r in results:
        print(f"  score={r.score:.3f}  conv={r.conversation_id:<14}  {r.text[:70]}")


--- vector_weight=0.0 ---
  score=0.033  conv=conv-database   Set up async PostgreSQL connection pooling with SQLAlchemy.
  score=0.016  conv=conv-debug      Debug the authentication error: users get 401 even with valid tokens.
  score=0.016  conv=conv-api        Design REST API endpoints for user management with CRUD operations.

--- vector_weight=0.5 ---
  score=0.033  conv=conv-database   Set up async PostgreSQL connection pooling with SQLAlchemy.
  score=0.016  conv=conv-debug      Debug the authentication error: users get 401 even with valid tokens.
  score=0.016  conv=conv-api        Design REST API endpoints for user management with CRUD operations.

--- vector_weight=1.0 ---
  score=0.033  conv=conv-database   Set up async PostgreSQL connection pooling with SQLAlchemy.
  score=0.016  conv=conv-debug      Debug the authentication error: users get 401 even with valid tokens.
  score=0.016  conv=conv-api        Design REST API endpoints for user management with CRUD operations.


## Exclude Conversation

The "don't show me what I just said" pattern — essential for Claude Code hooks where
you want to find *other* conversations about the same topic.

In [8]:
query = "JWT authentication"

all_results = await tm.search(query, config=RetrievalConfig(limit=5, include_context=False))
filtered = await tm.search(
    query,
    config=RetrievalConfig(limit=5, include_context=False, exclude_conversation_id="conv-auth"),
)

print("=== All results ===")
for r in all_results:
    print(f"  conv={r.conversation_id:<14}  {r.text[:60]}")

print("\n=== Excluding conv-auth ===")
for r in filtered:
    print(f"  conv={r.conversation_id:<14}  {r.text[:60]}")

=== All results ===
  conv=conv-auth       Add JWT authentication middleware to our FastAPI application
  conv=conv-testing    Write pytest tests for the authentication module with mocked
  conv=conv-debug      Debug the authentication error: users get 401 even with vali
  conv=conv-api        Design REST API endpoints for user management with CRUD oper
  conv=conv-database   Set up async PostgreSQL connection pooling with SQLAlchemy.

=== Excluding conv-auth ===
  conv=conv-debug      Debug the authentication error: users get 401 even with vali
  conv=conv-testing    Write pytest tests for the authentication module with mocked
  conv=conv-api        Design REST API endpoints for user management with CRUD oper
  conv=conv-database   Set up async PostgreSQL connection pooling with SQLAlchemy.
  conv=conv-deploy     Create a Dockerfile and docker-compose.yml for production de


## Unique Conversations

`unique_conversations=True` deduplicates results — at most one hit per conversation.

In [9]:
query = "auth"

normal = await tm.search(query, config=RetrievalConfig(limit=10, include_context=False))
unique = await tm.search(
    query,
    config=RetrievalConfig(limit=10, include_context=False, unique_conversations=True),
)

print(f"Normal: {len(normal)} results from conversations: {[r.conversation_id for r in normal]}")
print(f"Unique: {len(unique)} results from conversations: {[r.conversation_id for r in unique]}")

Normal: 6 results from conversations: ['conv-debug', 'conv-testing', 'conv-auth', 'conv-api', 'conv-database', 'conv-deploy']
Unique: 6 results from conversations: ['conv-debug', 'conv-testing', 'conv-auth', 'conv-api', 'conv-database', 'conv-deploy']


## Resource Query Sorting

`get_conversations_for_resource()` supports sorting by `created_at` or `last_accessed_at`, ascending or descending.

In [10]:
uri = "file://src/auth.py"

newest_first = await tm.get_conversations_for_resource(
    uri, config=RetrievalConfig(sort_by="created_at", sort_order="desc")
)
oldest_first = await tm.get_conversations_for_resource(
    uri, config=RetrievalConfig(sort_by="created_at", sort_order="asc")
)

print("=== Newest first ===")
for ref in newest_first:
    print(f"  {ref.conversation_id}: {ref.user_text[:60]}")

print("\n=== Oldest first ===")
for ref in oldest_first:
    print(f"  {ref.conversation_id}: {ref.user_text[:60]}")

=== Newest first ===
  conv-debug: Debug the authentication error: users get 401 even with vali
  conv-debug: Debug the authentication error: users get 401 even with vali
  conv-testing: Write pytest tests for the authentication module with mocked
  conv-api: Design REST API endpoints for user management with CRUD oper
  conv-auth: Add JWT authentication middleware to our FastAPI application
  conv-auth: Add JWT authentication middleware to our FastAPI application

=== Oldest first ===
  conv-auth: Add JWT authentication middleware to our FastAPI application
  conv-auth: Add JWT authentication middleware to our FastAPI application
  conv-api: Design REST API endpoints for user management with CRUD oper
  conv-testing: Write pytest tests for the authentication module with mocked
  conv-debug: Debug the authentication error: users get 401 even with vali
  conv-debug: Debug the authentication error: users get 401 even with vali


## Reranker Comparison: RRF vs. Linear

Two TraceMem instances sharing the same storage, but using different rerankers.
Same query, different rankings.

In [11]:
# Create a second TraceMem with linear reranker, pointing to the same storage
tm_linear = TraceMem(config=config, reranker="linear")
await tm_linear.__aenter__()

query = "authentication error debugging"

rrf_results = await tm.search(query, config=RetrievalConfig(limit=5, include_context=False))
linear_results = await tm_linear.search(query, config=RetrievalConfig(limit=5, include_context=False))

print(f"{'Rank':<6} {'RRF score':<12} {'RRF conv':<16} {'Linear score':<14} {'Linear conv'}")
print("-" * 70)
for i in range(max(len(rrf_results), len(linear_results))):
    rrf = rrf_results[i] if i < len(rrf_results) else None
    lin = linear_results[i] if i < len(linear_results) else None
    rrf_score = f"{rrf.score:<12.3f}" if rrf else f"{'N/A':<12}"
    rrf_conv = f"{rrf.conversation_id:<16}" if rrf else f"{'N/A':<16}"
    lin_score = f"{lin.score:<14.3f}" if lin else f"{'N/A':<14}"
    lin_conv = lin.conversation_id if lin else "N/A"
    print(f"{i+1:<6} {rrf_score} {rrf_conv} {lin_score} {lin_conv}")

await tm_linear.__aexit__(None, None, None)

Rank   RRF score    RRF conv         Linear score   Linear conv
----------------------------------------------------------------------
1      0.033        conv-debug       0.903          conv-auth
2      0.032        conv-testing     0.823          conv-testing
3      0.031        conv-auth        0.500          conv-database
4      0.016        conv-api         0.498          conv-deploy
5      0.015        conv-deploy      0.386          conv-api


## Trajectory Depth

Control how many MESSAGE hops to follow when building a trajectory.
`trajectory_max_depth=2` truncates to just the first two steps.

In [12]:
# Find a result from the debug conversation (which has multiple turns)
debug_results = await tm.search(
    "401 error",
    config=RetrievalConfig(limit=1, include_context=False),
)

if debug_results:
    node_id = debug_results[0].node_id

    full_traj = await tm.get_trajectory(node_id)
    short_traj = await tm.get_trajectory(
        node_id, config=RetrievalConfig(trajectory_max_depth=2)
    )

    print(f"Full trajectory: {len(full_traj.steps)} steps")
    print(full_traj)
    print(f"\nShort trajectory (max_depth=2): {len(short_traj.steps)} steps")
    print(short_traj)
else:
    print("No results found for the debug query.")

Full trajectory: 4 steps
Trajectory(4 steps):
  Step(12cccdd4 2026-02-08 17:25 UserText: 'Debug the authentication error: users get 401 even with vali...')
  Step(f0a6d2e9 2026-02-08 17:25 AgentText: 'Let me check the auth module for issues.' tools=[read_file])
  Step(dcf687f0 2026-02-08 17:25 AgentText: "Found the issue: the token extraction isn't using HTTPBearer..." tools=[edit_file])
  Step(cc320c89 2026-02-08 17:25 AgentText: 'Fixed. The auth middleware now correctly uses HTTPBearer to ...')

Short trajectory (max_depth=2): 3 steps
Trajectory(3 steps):
  Step(12cccdd4 2026-02-08 17:25 UserText: 'Debug the authentication error: users get 401 even with vali...')
  Step(f0a6d2e9 2026-02-08 17:25 AgentText: 'Let me check the auth module for issues.' tools=[read_file])
  Step(dcf687f0 2026-02-08 17:25 AgentText: "Found the issue: the token extraction isn't using HTTPBearer..." tools=[edit_file])


## Cleanup

In [13]:
await tm.__aexit__(None, None, None)
shutil.rmtree(_tmpdir, ignore_errors=True)
print("Cleaned up.")

Cleaned up.
