Skip to content

Commit e48cd64

Browse files
committed
feat(memory): scoringWeights override on CognitiveRetrievalOptions
New optional field CognitiveRetrievalOptions.scoringWeights accepts Partial<ScoringWeights> to override the 6-signal retrieval weights for a single call. Missing keys fall back to DEFAULT_SCORING_WEIGHTS. Also exposes the SignalName type (keyof ScoringWeights) for typesafe ablation APIs. Enables agentos-bench signal-ablation studies: zero one weight at a time on a stratified sample, measure Δaccuracy. Implementation: MemoryStore.query merges options.scoringWeights over DEFAULT_SCORING_WEIGHTS and threads the result into ScoringContext.weights. No behaviour change when scoringWeights is omitted — defaults pass through unchanged.
1 parent 9433dad commit e48cd64

5 files changed

Lines changed: 186 additions & 1 deletion

File tree

src/memory/core/decay/RetrievalPriorityScorer.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ export interface ScoringWeights {
3030
importance: number;
3131
}
3232

33+
/**
34+
* Name of a single retrieval signal. Enables ablation studies — zero
35+
* one weight at a time and measure Δaccuracy.
36+
*/
37+
export type SignalName = keyof ScoringWeights;
38+
3339
export const DEFAULT_SCORING_WEIGHTS: ScoringWeights = {
3440
strength: 0.25,
3541
similarity: 0.35,

src/memory/core/types.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,14 @@ export interface CognitiveRetrievalOptions {
213213
hyde?: boolean;
214214
/** Shared retrieval profile and confidence policy. */
215215
policy?: MemoryRetrievalPolicy;
216+
/**
217+
* Override the 6-signal retrieval weights for this call. Missing
218+
* keys fall back to {@link DEFAULT_SCORING_WEIGHTS}. Useful for
219+
* ablation studies (zero one weight at a time and measure
220+
* Δaccuracy) and for A/B testing alternate weight configurations
221+
* without mutating global defaults.
222+
*/
223+
scoringWeights?: Partial<import('./decay/RetrievalPriorityScorer.js').ScoringWeights>;
216224
}
217225

218226
export interface ScoredMemoryTrace extends MemoryTrace {

src/memory/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ export type {
9696
ScoringWeights,
9797
ScoringContext,
9898
CandidateTrace,
99+
SignalName,
99100
} from './core/decay/RetrievalPriorityScorer.js';
100101

101102
// --- Working Memory ---

src/memory/retrieval/store/MemoryStore.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,10 @@ import {
3636
import {
3737
scoreAndRankTraces,
3838
detectPartiallyRetrieved,
39+
DEFAULT_SCORING_WEIGHTS,
3940
type CandidateTrace,
4041
type ScoringContext,
42+
type ScoringWeights,
4143
} from '../../core/decay/RetrievalPriorityScorer.js';
4244

4345
// ---------------------------------------------------------------------------
@@ -366,12 +368,17 @@ export class MemoryStore {
366368
}
367369
}
368370

369-
// Score and rank
371+
// Score and rank — optional per-call scoringWeights override
372+
// enables ablation studies (zero one signal at a time).
373+
const effectiveWeights: ScoringWeights | undefined = options.scoringWeights
374+
? { ...DEFAULT_SCORING_WEIGHTS, ...options.scoringWeights }
375+
: undefined;
370376
const scoringContext: ScoringContext = {
371377
currentMood,
372378
now,
373379
neutralMood: options.neutralMood,
374380
decayConfig: this.decay,
381+
weights: effectiveWeights,
375382
};
376383

377384
const scored = scoreAndRankTraces(allCandidates, scoringContext).slice(0, topK);
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
/**
2+
* @fileoverview Pins the per-request scoringWeights override on
3+
* CognitiveRetrievalOptions. Benchmarks need to zero one signal at
4+
* a time to measure Δaccuracy (signal ablation at real scale); this
5+
* surface is the public API for that.
6+
*
7+
* Uses the same mock harness as the integration spec to avoid SQLite
8+
* / API dependencies.
9+
*/
10+
11+
import { describe, it, expect, vi, beforeEach } from 'vitest';
12+
import { CognitiveMemoryManager } from '../../src/memory/CognitiveMemoryManager';
13+
import type { CognitiveMemoryConfig, PADState } from '../../src/memory/core/config';
14+
import type { IVectorStore, VectorDocument, QueryResult } from '../../src/rag/IVectorStore';
15+
import type { IEmbeddingManager } from '../../src/rag/IEmbeddingManager';
16+
import type { IKnowledgeGraph } from '../../src/core/knowledge/IKnowledgeGraph';
17+
import type { IWorkingMemory } from '../../src/cognitive_substrate/memory/IWorkingMemory';
18+
import type { SignalName } from '../../src/memory/core/decay/RetrievalPriorityScorer';
19+
20+
function createMockVectorStore(): IVectorStore {
21+
const collections = new Map<string, VectorDocument[]>();
22+
return {
23+
initialize: vi.fn().mockResolvedValue(undefined),
24+
createCollection: vi.fn().mockResolvedValue(undefined),
25+
deleteCollection: vi.fn().mockResolvedValue(undefined),
26+
collectionExists: vi.fn(async (n: string) => collections.has(n)),
27+
upsert: vi.fn(async (c: string, docs: VectorDocument[]) => {
28+
const existing = collections.get(c) ?? [];
29+
for (const d of docs) {
30+
const i = existing.findIndex((e) => e.id === d.id);
31+
if (i >= 0) existing[i] = d;
32+
else existing.push(d);
33+
}
34+
collections.set(c, existing);
35+
return { succeeded: docs.length, failed: 0 };
36+
}),
37+
query: vi.fn(async (c: string): Promise<QueryResult> => ({
38+
documents: (collections.get(c) ?? []).map((d) => ({ ...d, similarityScore: 0.85 })),
39+
})),
40+
deleteByIds: vi.fn().mockResolvedValue(undefined),
41+
getStats: vi.fn().mockResolvedValue({ documentCount: 0, vectorCount: 0 }),
42+
shutdown: vi.fn().mockResolvedValue(undefined),
43+
} as unknown as IVectorStore;
44+
}
45+
46+
function createMockEmbeddingManager(): IEmbeddingManager {
47+
return {
48+
generateEmbeddings: vi.fn(async () => ({ embeddings: [[0.1, 0.2, 0.3, 0.4]], model: 'mock', tokensUsed: 10 })),
49+
getDimension: vi.fn().mockReturnValue(4),
50+
} as unknown as IEmbeddingManager;
51+
}
52+
53+
function createMockKnowledgeGraph(): IKnowledgeGraph {
54+
return {
55+
initialize: vi.fn().mockResolvedValue(undefined),
56+
recordMemory: vi.fn().mockResolvedValue({ id: 'mem-1', createdAt: new Date().toISOString(), accessCount: 0, lastAccessedAt: new Date().toISOString() }),
57+
upsertEntity: vi.fn().mockResolvedValue({ id: 'e-1' }),
58+
getEntity: vi.fn().mockResolvedValue(undefined),
59+
queryEntities: vi.fn().mockResolvedValue([]),
60+
deleteEntity: vi.fn().mockResolvedValue(true),
61+
upsertRelation: vi.fn().mockResolvedValue({ id: 'r-1' }),
62+
getRelations: vi.fn().mockResolvedValue([]),
63+
deleteRelation: vi.fn().mockResolvedValue(true),
64+
getMemory: vi.fn().mockResolvedValue(undefined),
65+
queryMemories: vi.fn().mockResolvedValue([]),
66+
recallMemories: vi.fn().mockResolvedValue([]),
67+
traverse: vi.fn().mockResolvedValue({ root: {}, levels: [], totalEntities: 0, totalRelations: 0 }),
68+
findPath: vi.fn().mockResolvedValue(null),
69+
getNeighborhood: vi.fn().mockResolvedValue({ entities: [], relations: [] }),
70+
semanticSearch: vi.fn().mockResolvedValue([]),
71+
extractFromText: vi.fn().mockResolvedValue({ entities: [], relations: [] }),
72+
mergeEntities: vi.fn().mockResolvedValue({}),
73+
decayMemories: vi.fn().mockResolvedValue(0),
74+
getStats: vi.fn().mockResolvedValue({ totalEntities: 0, totalRelations: 0, totalMemories: 0 }),
75+
clear: vi.fn().mockResolvedValue(undefined),
76+
} as unknown as IKnowledgeGraph;
77+
}
78+
79+
function createMockWorkingMemory(): IWorkingMemory {
80+
const store = new Map<string, unknown>();
81+
return {
82+
id: 'mock-wm',
83+
initialize: vi.fn().mockResolvedValue(undefined),
84+
set: vi.fn(async (k: string, v: unknown) => { store.set(k, v); }),
85+
get: vi.fn(async (k: string) => store.get(k)),
86+
delete: vi.fn(async (k: string) => { store.delete(k); }),
87+
getAll: vi.fn(async () => Object.fromEntries(store)),
88+
clear: vi.fn(async () => { store.clear(); }),
89+
size: vi.fn(async () => store.size),
90+
has: vi.fn(async (k: string) => store.has(k)),
91+
close: vi.fn().mockResolvedValue(undefined),
92+
} as unknown as IWorkingMemory;
93+
}
94+
95+
describe('CognitiveMemoryManager retrieve with scoringWeights override', () => {
96+
let manager: CognitiveMemoryManager;
97+
const neutralMood: PADState = { valence: 0, arousal: 0, dominance: 0 };
98+
99+
beforeEach(async () => {
100+
manager = new CognitiveMemoryManager();
101+
await manager.initialize({
102+
vectorStore: createMockVectorStore(),
103+
embeddingManager: createMockEmbeddingManager(),
104+
knowledgeGraph: createMockKnowledgeGraph(),
105+
workingMemory: createMockWorkingMemory(),
106+
agentId: 'test-agent',
107+
traits: { openness: 0.7, conscientiousness: 0.6, emotionality: 0.5 },
108+
moodProvider: () => neutralMood,
109+
featureDetectionStrategy: 'keyword',
110+
collectionPrefix: 'test',
111+
} as CognitiveMemoryConfig);
112+
});
113+
114+
it('accepts scoringWeights on retrieve() and produces a ranked result', async () => {
115+
await manager.encode('dark mode preference', neutralMood, 'neutral');
116+
const result = await manager.retrieve('dark mode', neutralMood, {
117+
scopes: [{ scope: 'user', scopeId: 'test-agent' }],
118+
scoringWeights: { similarity: 0, strength: 1, recency: 0, emotionalCongruence: 0, graphActivation: 0, importance: 0 },
119+
});
120+
expect(result.retrieved.length).toBeGreaterThan(0);
121+
// When similarity is zeroed and strength is the only signal, the
122+
// retrieval still succeeds — the scorer still produces a number.
123+
expect(typeof result.retrieved[0].retrievalScore).toBe('number');
124+
});
125+
126+
it('zeroing a signal changes scoreBreakdown shape vs default', async () => {
127+
await manager.encode('signal ablation test', neutralMood, 'neutral');
128+
const defaultResult = await manager.retrieve('signal ablation', neutralMood, {
129+
scopes: [{ scope: 'user', scopeId: 'test-agent' }],
130+
});
131+
const zeroSimilarity = await manager.retrieve('signal ablation', neutralMood, {
132+
scopes: [{ scope: 'user', scopeId: 'test-agent' }],
133+
scoringWeights: { similarity: 0 },
134+
});
135+
// The similarity component of the score should reflect different
136+
// weights. Both calls must produce results.
137+
expect(defaultResult.retrieved.length).toBeGreaterThan(0);
138+
expect(zeroSimilarity.retrieved.length).toBeGreaterThan(0);
139+
});
140+
141+
it('partial weights merge with defaults', async () => {
142+
await manager.encode('partial weight merge', neutralMood, 'neutral');
143+
// Supplying only `{ similarity: 0 }` should not require specifying
144+
// the other five weights; they fall back to defaults.
145+
const result = await manager.retrieve('partial weight', neutralMood, {
146+
scopes: [{ scope: 'user', scopeId: 'test-agent' }],
147+
scoringWeights: { similarity: 0 },
148+
});
149+
expect(result.retrieved.length).toBeGreaterThan(0);
150+
});
151+
152+
it('SignalName type exports the six canonical signal keys', () => {
153+
const signals: SignalName[] = [
154+
'strength',
155+
'similarity',
156+
'recency',
157+
'emotionalCongruence',
158+
'graphActivation',
159+
'importance',
160+
];
161+
expect(signals).toHaveLength(6);
162+
});
163+
});

0 commit comments

Comments
 (0)