From 9c788e364e6a4a3999904916a59eddd1830ba937 Mon Sep 17 00:00:00 2001 From: Bao Ha Date: Sun, 12 Apr 2026 16:26:47 +0700 Subject: [PATCH] feat: [ENG-1892] Add QueryExecutorResult type with tier and timing metadata --- .../core/domain/entities/query-log-entry.ts | 19 +- .../interfaces/executor/i-query-executor.ts | 24 +- src/server/infra/daemon/agent-process.ts | 3 +- src/server/infra/executor/query-executor.ts | 156 ++++++-- .../infra/executor/query-executor.test.ts | 337 +++++++++++++++--- 5 files changed, 448 insertions(+), 91 deletions(-) diff --git a/src/server/core/domain/entities/query-log-entry.ts b/src/server/core/domain/entities/query-log-entry.ts index 2b183f19c..26d69dc28 100644 --- a/src/server/core/domain/entities/query-log-entry.ts +++ b/src/server/core/domain/entities/query-log-entry.ts @@ -8,17 +8,24 @@ export type QueryLogTier = (typeof QUERY_LOG_TIERS)[number] export type TierKey = `tier${QueryLogTier}` +/** Named tier constants — single source of truth for tier assignments in QueryExecutor. */ +export const TIER_EXACT_CACHE: QueryLogTier = 0 +export const TIER_FUZZY_CACHE: QueryLogTier = 1 +export const TIER_DIRECT_SEARCH: QueryLogTier = 2 +export const TIER_OPTIMIZED_LLM: QueryLogTier = 3 +export const TIER_FULL_AGENTIC: QueryLogTier = 4 + /** Human-readable labels for each resolution tier. */ export const QUERY_LOG_TIER_LABELS: Record = { - 0: 'exact cache hit', - 1: 'fuzzy cache match', - 2: 'direct search', - 3: 'optimized LLM', - 4: 'full agentic', + [TIER_DIRECT_SEARCH]: 'direct search', + [TIER_EXACT_CACHE]: 'exact cache hit', + [TIER_FULL_AGENTIC]: 'full agentic', + [TIER_FUZZY_CACHE]: 'fuzzy cache match', + [TIER_OPTIMIZED_LLM]: 'optimized LLM', } /** Tiers considered cache hits for cache-hit-rate calculation. */ -export const CACHE_TIERS = [0, 1] as const satisfies readonly QueryLogTier[] +export const CACHE_TIERS = [TIER_EXACT_CACHE, TIER_FUZZY_CACHE] as const satisfies readonly QueryLogTier[] export type ByTier = Record & {unknown: number} diff --git a/src/server/core/interfaces/executor/i-query-executor.ts b/src/server/core/interfaces/executor/i-query-executor.ts index 69b7f3bbb..f37f2930a 100644 --- a/src/server/core/interfaces/executor/i-query-executor.ts +++ b/src/server/core/interfaces/executor/i-query-executor.ts @@ -1,4 +1,5 @@ import type {ICipherAgent} from '../../../../agent/core/interfaces/i-cipher-agent.js' +import type {QueryLogMatchedDoc, QueryLogSearchMetadata, QueryLogTier} from '../../domain/entities/query-log-entry.js' /** * Options for executing query with an injected agent. @@ -11,6 +12,25 @@ export interface QueryExecuteOptions { taskId: string } +/** + * Structured result from QueryExecutor containing the response string + * plus metadata about how the query was resolved. + * + * Consumed by QueryLogHandler (ENG-1893) to persist query log entries. + */ +export type QueryExecutorResult = { + /** Documents matched during search (empty for cache hits) */ + matchedDocs: QueryLogMatchedDoc[] + /** The response string (includes attribution footer) */ + response: string + /** Search statistics (undefined for cache-only tiers 0/1) */ + searchMetadata?: QueryLogSearchMetadata + /** Resolution tier: 0=exact cache, 1=fuzzy cache, 2=direct search, 3=optimized LLM, 4=full agentic */ + tier: QueryLogTier + /** Wall-clock timing from method entry to return */ + timing: {durationMs: number} +} + /** * IQueryExecutor - Executes query tasks with an injected CipherAgent. * @@ -28,7 +48,7 @@ export interface IQueryExecutor { * * @param agent - Long-lived CipherAgent (managed by caller) * @param options - Execution options (query) - * @returns Result string from agent execution + * @returns Structured result with response, tier, timing, and search metadata */ - executeWithAgent(agent: ICipherAgent, options: QueryExecuteOptions): Promise + executeWithAgent(agent: ICipherAgent, options: QueryExecuteOptions): Promise } diff --git a/src/server/infra/daemon/agent-process.ts b/src/server/infra/daemon/agent-process.ts index be49ea7b7..5a8eb74a5 100644 --- a/src/server/infra/daemon/agent-process.ts +++ b/src/server/infra/daemon/agent-process.ts @@ -470,7 +470,8 @@ async function executeTask( } case 'query': { - result = await queryExecutor.executeWithAgent(agent, {query: content, taskId}) + const queryResult = await queryExecutor.executeWithAgent(agent, {query: content, taskId}) + result = queryResult.response break } diff --git a/src/server/infra/executor/query-executor.ts b/src/server/infra/executor/query-executor.ts index 68568dc0a..3168fcc67 100644 --- a/src/server/infra/executor/query-executor.ts +++ b/src/server/infra/executor/query-executor.ts @@ -1,24 +1,41 @@ -import { join } from 'node:path' - -import type { ICipherAgent } from '../../../agent/core/interfaces/i-cipher-agent.js' -import type { IFileSystem } from '../../../agent/core/interfaces/i-file-system.js' -import type { ISearchKnowledgeService, SearchKnowledgeResult } from '../../../agent/infra/sandbox/tools-sdk.js' -import type { IQueryExecutor, QueryExecuteOptions } from '../../core/interfaces/executor/i-query-executor.js' - -import { ABSTRACT_EXTENSION, BRV_DIR, CONTEXT_FILE_EXTENSION, CONTEXT_TREE_DIR } from '../../constants.js' -import { isDerivedArtifact } from '../context-tree/derived-artifact.js' -import { FileContextTreeManifestService } from '../context-tree/file-context-tree-manifest-service.js' +import {join} from 'node:path' + +import type {ICipherAgent} from '../../../agent/core/interfaces/i-cipher-agent.js' +import type {IFileSystem} from '../../../agent/core/interfaces/i-file-system.js' +import type {ISearchKnowledgeService, SearchKnowledgeResult} from '../../../agent/infra/sandbox/tools-sdk.js' +import type {QueryLogMatchedDoc} from '../../core/domain/entities/query-log-entry.js' +import type { + IQueryExecutor, + QueryExecuteOptions, + QueryExecutorResult, +} from '../../core/interfaces/executor/i-query-executor.js' + +import {ABSTRACT_EXTENSION, BRV_DIR, CONTEXT_FILE_EXTENSION, CONTEXT_TREE_DIR} from '../../constants.js' +import { + TIER_DIRECT_SEARCH, + TIER_EXACT_CACHE, + TIER_FULL_AGENTIC, + TIER_FUZZY_CACHE, + TIER_OPTIMIZED_LLM, +} from '../../core/domain/entities/query-log-entry.js' +import {isDerivedArtifact} from '../context-tree/derived-artifact.js' +import {FileContextTreeManifestService} from '../context-tree/file-context-tree-manifest-service.js' import { canRespondDirectly, type DirectSearchResult, formatDirectResponse, formatNotFoundResponse, } from './direct-search-responder.js' -import { QueryResultCache } from './query-result-cache.js' +import {QueryResultCache} from './query-result-cache.js' /** Attribution footer appended to all query responses */ const ATTRIBUTION_FOOTER = '\n\n---\nSource: ByteRover Knowledge Base' +/** Map search results to the matchedDocs shape for QueryExecutorResult. */ +function buildMatchedDocs(sr: SearchKnowledgeResult | undefined): QueryLogMatchedDoc[] { + return (sr?.results ?? []).map((r) => ({path: r.path, score: r.score, title: r.title})) +} + /** Minimum normalized score to consider a result high-confidence for pre-fetching */ const SMART_ROUTING_SCORE_THRESHOLD = 0.7 @@ -63,7 +80,7 @@ export class QueryExecutor implements IQueryExecutor { private static readonly FINGERPRINT_CACHE_TTL_MS = 30_000 private readonly baseDirectory?: string private readonly cache?: QueryResultCache - private cachedFingerprint?: { expiresAt: number; value: string } + private cachedFingerprint?: {expiresAt: number; value: string} private readonly fileSystem?: IFileSystem private readonly searchService?: ISearchKnowledgeService @@ -76,11 +93,12 @@ export class QueryExecutor implements IQueryExecutor { } } - public async executeWithAgent(agent: ICipherAgent, options: QueryExecuteOptions): Promise { - const { query, taskId } = options + public async executeWithAgent(agent: ICipherAgent, options: QueryExecuteOptions): Promise { + const startTime = Date.now() + const {query, taskId} = options // Start search early — runs in parallel with fingerprint computation (independent operations) - const searchPromise = this.searchService?.search(query, { limit: SMART_ROUTING_MAX_DOCS }) + const searchPromise = this.searchService?.search(query, {limit: SMART_ROUTING_MAX_DOCS}) // Prevent unhandled rejection if we return early (cache hit) while search is still pending searchPromise?.catch(() => {}) @@ -90,7 +108,12 @@ export class QueryExecutor implements IQueryExecutor { fingerprint = await this.computeContextTreeFingerprint() const cached = this.cache.get(query, fingerprint) if (cached) { - return cached + ATTRIBUTION_FOOTER + return { + matchedDocs: [], + response: cached + ATTRIBUTION_FOOTER, + tier: TIER_EXACT_CACHE, + timing: {durationMs: Date.now() - startTime}, + } } } @@ -98,7 +121,12 @@ export class QueryExecutor implements IQueryExecutor { if (this.cache && fingerprint) { const fuzzyHit = this.cache.findSimilar(query, fingerprint) if (fuzzyHit) { - return fuzzyHit + ATTRIBUTION_FOOTER + return { + matchedDocs: [], + response: fuzzyHit + ATTRIBUTION_FOOTER, + tier: TIER_FUZZY_CACHE, + timing: {durationMs: Date.now() - startTime}, + } } } @@ -122,7 +150,13 @@ export class QueryExecutor implements IQueryExecutor { this.cache.set(query, response, fingerprint) } - return response + ATTRIBUTION_FOOTER + return { + matchedDocs: [], + response: response + ATTRIBUTION_FOOTER, + searchMetadata: {resultCount: 0, topScore: 0, totalFound: 0}, + tier: TIER_DIRECT_SEARCH, + timing: {durationMs: Date.now() - startTime}, + } } // === Tier 2: Direct search response (~100-200ms) === @@ -133,7 +167,18 @@ export class QueryExecutor implements IQueryExecutor { this.cache.set(query, directResult, fingerprint) } - return directResult + ATTRIBUTION_FOOTER + return { + matchedDocs: buildMatchedDocs(searchResult), + response: directResult + ATTRIBUTION_FOOTER, + searchMetadata: { + cacheFingerprint: fingerprint, + resultCount: searchResult.results.length, + topScore: searchResult.results[0]?.score ?? 0, + totalFound: searchResult.totalFound, + }, + tier: TIER_DIRECT_SEARCH, + timing: {durationMs: Date.now() - startTime}, + } } } @@ -156,9 +201,7 @@ export class QueryExecutor implements IQueryExecutor { if (manifest) { const resolved = await manifestService.resolveForInjection(manifest, query, this.baseDirectory) if (resolved.length > 0) { - manifestContext = resolved - .map((e) => `[${e.type} ${e.path}]\n${e.content}`) - .join('\n\n---\n\n') + manifestContext = resolved.map((e) => `[${e.type} ${e.path}]\n${e.content}`).join('\n\n---\n\n') } } } catch { @@ -198,12 +241,12 @@ export class QueryExecutor implements IQueryExecutor { // Query-optimized LLM overrides: tokens and lower temperature const queryOverrides = prefetchedContext - ? { maxIterations: 50, maxTokens: 1024, temperature: 0.3 } - : { maxIterations: 50, maxTokens: 2048, temperature: 0.5 } + ? {maxIterations: 50, maxTokens: 1024, temperature: 0.3} + : {maxIterations: 50, maxTokens: 2048, temperature: 0.5} try { const response = await agent.executeOnSession(taskSessionId, prompt, { - executionContext: { commandType: 'query', ...queryOverrides }, + executionContext: {commandType: 'query', ...queryOverrides}, taskId, }) @@ -212,7 +255,19 @@ export class QueryExecutor implements IQueryExecutor { this.cache.set(query, response, fingerprint) } - return response + ATTRIBUTION_FOOTER + const tier = prefetchedContext ? TIER_OPTIMIZED_LLM : TIER_FULL_AGENTIC + return { + matchedDocs: buildMatchedDocs(searchResult), + response: response + ATTRIBUTION_FOOTER, + searchMetadata: { + cacheFingerprint: fingerprint, + resultCount: searchResult?.results.length ?? 0, + topScore: searchResult?.results[0]?.score ?? 0, + totalFound: searchResult?.totalFound ?? 0, + }, + tier, + timing: {durationMs: Date.now() - startTime}, + } } finally { // Clean up entire task session (sandbox + history) in one call await agent.deleteTaskSession(taskSessionId) @@ -227,9 +282,7 @@ export class QueryExecutor implements IQueryExecutor { private buildPrefetchedContext(searchResult: SearchKnowledgeResult): string | undefined { if (searchResult.totalFound === 0) return undefined - const highConfidenceResults = searchResult.results.filter( - (r) => r.score >= SMART_ROUTING_SCORE_THRESHOLD, - ) + const highConfidenceResults = searchResult.results.filter((r) => r.score >= SMART_ROUTING_SCORE_THRESHOLD) if (highConfidenceResults.length === 0) return undefined @@ -253,13 +306,13 @@ export class QueryExecutor implements IQueryExecutor { query: string, options: { manifestContext?: string - metadata: { hasPreFetched: boolean; resultCount: number; topScore: number; totalFound: number } + metadata: {hasPreFetched: boolean; resultCount: number; topScore: number; totalFound: number} metaVar: string prefetchedContext?: string resultsVar: string }, ): string { - const { manifestContext, metadata, metaVar, prefetchedContext, resultsVar } = options + const {manifestContext, metadata, metaVar, prefetchedContext, resultsVar} = options const groundingRules = `### Grounding Rules (CRITICAL) - ONLY use information from the curated knowledge base (.brv/context-tree/) - If no relevant knowledge is found, respond: "This topic is not covered in the knowledge base." @@ -374,9 +427,36 @@ ${responseFormat}` */ private extractQueryEntities(query: string): string[] { const stopwords = new Set([ - 'a', 'about', 'an', 'and', 'by', 'did', 'do', 'does', 'for', 'from', - 'how', 'in', 'is', 'my', 'of', 'or', 'our', 'that', 'the', 'their', - 'this', 'to', 'was', 'were', 'what', 'when', 'where', 'which', 'who', 'with', + 'a', + 'about', + 'an', + 'and', + 'by', + 'did', + 'do', + 'does', + 'for', + 'from', + 'how', + 'in', + 'is', + 'my', + 'of', + 'or', + 'our', + 'that', + 'the', + 'their', + 'this', + 'to', + 'was', + 'were', + 'what', + 'when', + 'where', + 'which', + 'who', + 'with', ]) const words = query.toLowerCase().split(/\s+/) @@ -401,9 +481,7 @@ ${responseFormat}` try { const entitySearches = await Promise.allSettled( - entities.slice(0, 3).map((entity) => - this.searchService!.search(entity, { limit: 3 }), - ), + entities.slice(0, 3).map((entity) => this.searchService!.search(entity, {limit: 3})), ) // Collect existing paths to deduplicate @@ -456,13 +534,13 @@ ${responseFormat}` let content = result.excerpt try { const ctPath = join(BRV_DIR, CONTEXT_TREE_DIR, result.path) - const { content: fullContent } = await this.fileSystem!.readFile(ctPath) + const {content: fullContent} = await this.fileSystem!.readFile(ctPath) content = fullContent } catch { // Use excerpt if full read fails } - return { content, path: result.path, score: result.score, title: result.title } + return {content, path: result.path, score: result.score, title: result.title} }), ) diff --git a/test/unit/infra/executor/query-executor.test.ts b/test/unit/infra/executor/query-executor.test.ts index c40366c1b..ce920f9a7 100644 --- a/test/unit/infra/executor/query-executor.test.ts +++ b/test/unit/infra/executor/query-executor.test.ts @@ -13,76 +13,327 @@ */ import {expect} from 'chai' +import {restore, type SinonStub, stub} from 'sinon' + +import type {ICipherAgent} from '../../../../src/agent/core/interfaces/i-cipher-agent.js' +import type {IFileSystem} from '../../../../src/agent/core/interfaces/i-file-system.js' +import type {ISearchKnowledgeService, SearchKnowledgeResult} from '../../../../src/agent/infra/sandbox/tools-sdk.js' import {LocalSandbox} from '../../../../src/agent/infra/sandbox/local-sandbox.js' +import { + TIER_DIRECT_SEARCH, + TIER_EXACT_CACHE, + TIER_FULL_AGENTIC, + TIER_FUZZY_CACHE, + TIER_OPTIMIZED_LLM, +} from '../../../../src/server/core/domain/entities/query-log-entry.js' +import {QueryExecutor} from '../../../../src/server/infra/executor/query-executor.js' + +function createMockFileSystem(): IFileSystem { + return { + editFile: stub().resolves({bytesWritten: 0, replacements: 0}), + globFiles: stub().resolves({ + files: [ + {isDirectory: false, modified: new Date(1000), path: 'doc1.md', size: 100}, + {isDirectory: false, modified: new Date(2000), path: 'doc2.md', size: 200}, + ], + ignoredCount: 0, + totalFound: 2, + truncated: false, + }), + initialize: stub().resolves(), + readFile: stub().resolves({ + content: '# Test Document\n\nThis is test content about authentication and security.', + encoding: 'utf8', + }), + searchFiles: stub().resolves({matches: [], message: '', totalMatches: 0}), + writeFile: stub().resolves({bytesWritten: 0}), + } as unknown as IFileSystem +} + +function createMockSearchService( + results: SearchKnowledgeResult['results'] = [], + totalFound?: number, +): ISearchKnowledgeService { + const searchResult: SearchKnowledgeResult = { + message: '', + results, + totalFound: totalFound ?? results.length, + } + return { + search: stub().resolves(searchResult), + } as unknown as ISearchKnowledgeService +} + +function makeSearchResult( + overrides: Partial = {}, +): SearchKnowledgeResult['results'][0] { + return { + excerpt: 'Test excerpt about the topic.', + path: 'topics/auth.md', + score: 0.95, + title: 'Authentication Guide', + ...overrides, + } +} + +describe('QueryExecutor', () => { + describe('sandbox variable naming (regression)', () => { + // Typical UUID taskId with hyphens (as generated by crypto.randomUUID()) + const taskId = '8cd8e2d8-a7fc-4371-89ca-59460687c12d' + // What the LLM would write in code-exec (hyphens → underscores, valid JS identifier) + const llmGeneratedResultsVar = '__query_results_8cd8e2d8_a7fc_4371_89ca_59460687c12d' + const llmGeneratedMetaVar = '__query_meta_8cd8e2d8_a7fc_4371_89ca_59460687c12d' -describe('QueryExecutor - sandbox variable naming (regression)', () => { - // Typical UUID taskId with hyphens (as generated by crypto.randomUUID()) - const taskId = '8cd8e2d8-a7fc-4371-89ca-59460687c12d' - // What the LLM would write in code-exec (hyphens → underscores, valid JS identifier) - const llmGeneratedResultsVar = '__query_results_8cd8e2d8_a7fc_4371_89ca_59460687c12d' - const llmGeneratedMetaVar = '__query_meta_8cd8e2d8_a7fc_4371_89ca_59460687c12d' + describe('bug: hyphenated taskId causes variable name mismatch', () => { + it('should fail with ReferenceError when __query_results_* stored with hyphens', async () => { + const sandbox = new LocalSandbox() - describe('bug: hyphenated taskId causes variable name mismatch', () => { - it('should fail with ReferenceError when __query_results_* stored with hyphens', async () => { - const sandbox = new LocalSandbox() + const buggyResultsVar = `__query_results_${taskId}` + sandbox.updateContext({[buggyResultsVar]: [{path: '/a.md', score: 0.9}]}) - const buggyResultsVar = `__query_results_${taskId}` - sandbox.updateContext({[buggyResultsVar]: [{path: '/a.md', score: 0.9}]}) + const result = await sandbox.execute(llmGeneratedResultsVar) - const result = await sandbox.execute(llmGeneratedResultsVar) + expect(result.stderr).to.include('ReferenceError') + expect(result.stderr).to.include(llmGeneratedResultsVar) + }) - expect(result.stderr).to.include('ReferenceError') - expect(result.stderr).to.include(llmGeneratedResultsVar) + it('should fail with ReferenceError when __query_meta_* stored with hyphens', async () => { + const sandbox = new LocalSandbox() + + const buggyMetaVar = `__query_meta_${taskId}` + sandbox.updateContext({[buggyMetaVar]: {resultCount: 3, topScore: 0.9, totalFound: 10}}) + + const result = await sandbox.execute(`${llmGeneratedMetaVar}.resultCount`) + + expect(result.stderr).to.include('ReferenceError') + }) }) - it('should fail with ReferenceError when __query_meta_* stored with hyphens', async () => { - const sandbox = new LocalSandbox() + describe('fix: taskIdSafe with underscores eliminates mismatch', () => { + it('should succeed when __query_results_* stored with underscores matching LLM output', async () => { + const sandbox = new LocalSandbox() + + const taskIdSafe = taskId.replaceAll('-', '_') + const fixedResultsVar = `__query_results_${taskIdSafe}` + sandbox.updateContext({[fixedResultsVar]: [{path: '/a.md', score: 0.9}]}) - const buggyMetaVar = `__query_meta_${taskId}` - sandbox.updateContext({[buggyMetaVar]: {resultCount: 3, topScore: 0.9, totalFound: 10}}) + const result = await sandbox.execute(`${llmGeneratedResultsVar}[0].score`) - const result = await sandbox.execute(`${llmGeneratedMetaVar}.resultCount`) + expect(result.stderr).to.equal('') + expect(result.returnValue).to.equal(0.9) + }) - expect(result.stderr).to.include('ReferenceError') + it('should succeed when __query_meta_* stored with underscores matching LLM output', async () => { + const sandbox = new LocalSandbox() + + const taskIdSafe = taskId.replaceAll('-', '_') + const fixedMetaVar = `__query_meta_${taskIdSafe}` + sandbox.updateContext({[fixedMetaVar]: {resultCount: 3, topScore: 0.9, totalFound: 10}}) + + const result = await sandbox.execute(`${llmGeneratedMetaVar}.resultCount`) + + expect(result.stderr).to.equal('') + expect(result.returnValue).to.equal(3) + }) + + it('should correctly transform all UUID segments (4 hyphens replaced)', () => { + const taskIdSafe = taskId.replaceAll('-', '_') + + expect(taskIdSafe).to.not.include('-') + expect(taskIdSafe).to.equal('8cd8e2d8_a7fc_4371_89ca_59460687c12d') + + expect(`__query_results_${taskIdSafe}`).to.equal(llmGeneratedResultsVar) + expect(`__query_meta_${taskIdSafe}`).to.equal(llmGeneratedMetaVar) + }) }) }) - describe('fix: taskIdSafe with underscores eliminates mismatch', () => { - it('should succeed when __query_results_* stored with underscores matching LLM output', async () => { - const sandbox = new LocalSandbox() + // ── QueryExecutorResult tier tests ─────────────────────────────────────────── - const taskIdSafe = taskId.replaceAll('-', '_') - const fixedResultsVar = `__query_results_${taskIdSafe}` - sandbox.updateContext({[fixedResultsVar]: [{path: '/a.md', score: 0.9}]}) + /** Attribution footer appended by QueryExecutor to all responses */ + const ATTRIBUTION_FOOTER = '\n\n---\nSource: ByteRover Knowledge Base' - const result = await sandbox.execute(`${llmGeneratedResultsVar}[0].score`) + const TASK_ID = 'test-task-001' + const TASK_SESSION_ID = 'task-session-001' - expect(result.stderr).to.equal('') - expect(result.returnValue).to.equal(0.9) + function createMockAgent(): ICipherAgent { + return { + cancel: stub().resolves(false), + createTaskSession: stub().resolves(TASK_SESSION_ID), + deleteSandboxVariable: stub(), + deleteSandboxVariableOnSession: stub(), + deleteSession: stub().resolves(true), + deleteTaskSession: stub().resolves(), + execute: stub().resolves(''), + executeOnSession: stub().resolves('LLM response'), + generate: stub().resolves({content: '', toolCalls: [], usage: {inputTokens: 0, outputTokens: 0}}), + getSessionMetadata: stub().resolves(), + getState: stub().returns({ + currentIteration: 0, + executionHistory: [], + executionState: 'idle', + toolCallsExecuted: 0, + }), + listPersistedSessions: stub().resolves([]), + reset: stub(), + setSandboxVariable: stub(), + setSandboxVariableOnSession: stub(), + start: stub().resolves(), + stream: stub().resolves({ + [Symbol.asyncIterator]: () => ({next: () => Promise.resolve({done: true, value: undefined})}), + }), + } as unknown as ICipherAgent + } + + describe('executeWithAgent', () => { + afterEach(() => { + restore() }) - it('should succeed when __query_meta_* stored with underscores matching LLM output', async () => { - const sandbox = new LocalSandbox() + describe('Tier 0: exact cache hit', () => { + it('should return tier 0 with empty matchedDocs on exact cache hit', async () => { + const agent = createMockAgent() + const fileSystem = createMockFileSystem() + // First call: direct search (Tier 2) populates cache + const searchService = createMockSearchService([makeSearchResult({score: 0.95})]) + const executor = new QueryExecutor({enableCache: true, fileSystem, searchService}) + + // First call — goes through to Tier 2 direct search (score 0.95 > 0.93 threshold) + const firstResult = await executor.executeWithAgent(agent, {query: 'what is authentication', taskId: TASK_ID}) + expect(firstResult.tier).to.equal(TIER_DIRECT_SEARCH) + + // Second call — same query, same fingerprint → Tier 0 cache hit + const result = await executor.executeWithAgent(agent, {query: 'what is authentication', taskId: TASK_ID}) + + expect(result.tier).to.equal(TIER_EXACT_CACHE) + expect(result.matchedDocs).to.deep.equal([]) + expect(result.searchMetadata).to.be.undefined + expect(result.timing.durationMs).to.be.at.least(0) + expect(result.response).to.include(ATTRIBUTION_FOOTER) + }) + }) + + describe('Tier 1: fuzzy cache hit', () => { + it('should return tier 1 with empty matchedDocs on fuzzy cache match', async () => { + const agent = createMockAgent() + const fileSystem = createMockFileSystem() + const searchService = createMockSearchService([makeSearchResult({score: 0.95})]) + const executor = new QueryExecutor({enableCache: true, fileSystem, searchService}) + + // Prime cache with first query (goes through Tier 2 direct search) + await executor.executeWithAgent(agent, {query: 'authentication security guide overview', taskId: TASK_ID}) + + // Similar query with sufficient token overlap (Jaccard >= 0.6) + // Tokens: "authentication", "security", "guide" overlap; "detailed" and "overview" differ + const result = await executor.executeWithAgent(agent, { + query: 'authentication security guide detailed', + taskId: TASK_ID, + }) + + expect(result.tier).to.equal(TIER_FUZZY_CACHE) + expect(result.matchedDocs).to.deep.equal([]) + expect(result.searchMetadata).to.be.undefined + expect(result.timing.durationMs).to.be.at.least(0) + expect(result.response).to.include(ATTRIBUTION_FOOTER) + }) + }) + + describe('Tier 2: OOD (out-of-domain)', () => { + it('should return tier 2 with empty matchedDocs when search returns no results', async () => { + const agent = createMockAgent() + const fileSystem = createMockFileSystem() + const searchService = createMockSearchService([], 0) + const executor = new QueryExecutor({fileSystem, searchService}) + + const result = await executor.executeWithAgent(agent, {query: 'what is quantum computing', taskId: TASK_ID}) + + expect(result.tier).to.equal(TIER_DIRECT_SEARCH) + expect(result.matchedDocs).to.deep.equal([]) + expect(result.searchMetadata).to.deep.equal({resultCount: 0, topScore: 0, totalFound: 0}) + expect(result.timing.durationMs).to.be.at.least(0) + expect(result.response).to.include('No matching knowledge found') + expect(result.response).to.include(ATTRIBUTION_FOOTER) + }) + }) + + describe('Tier 2: direct search response', () => { + it('should return tier 2 with matchedDocs when direct response threshold met', async () => { + const agent = createMockAgent() + const fileSystem = createMockFileSystem() + const searchResult = makeSearchResult({path: 'topics/auth.md', score: 0.95, title: 'Authentication Guide'}) + const searchService = createMockSearchService([searchResult]) + const executor = new QueryExecutor({fileSystem, searchService}) + + const result = await executor.executeWithAgent(agent, {query: 'what is authentication', taskId: TASK_ID}) + + expect(result.tier).to.equal(TIER_DIRECT_SEARCH) + expect(result.matchedDocs).to.have.length(1) + expect(result.matchedDocs[0]).to.deep.equal({ + path: 'topics/auth.md', + score: 0.95, + title: 'Authentication Guide', + }) + expect(result.searchMetadata).to.deep.include({resultCount: 1, totalFound: 1}) + expect(result.searchMetadata!.topScore).to.equal(0.95) + expect(result.timing.durationMs).to.be.at.least(0) + expect(result.response).to.include(ATTRIBUTION_FOOTER) + }) + }) - const taskIdSafe = taskId.replaceAll('-', '_') - const fixedMetaVar = `__query_meta_${taskIdSafe}` - sandbox.updateContext({[fixedMetaVar]: {resultCount: 3, topScore: 0.9, totalFound: 10}}) + describe('Tier 3: optimized LLM with prefetched context', () => { + it('should return tier 3 when search results have high scores and LLM is invoked', async () => { + const agent = createMockAgent() + const fileSystem = createMockFileSystem() + // Score 0.75: above SMART_ROUTING_SCORE_THRESHOLD (0.7) for prefetch, + // but below DIRECT_RESPONSE_SCORE_THRESHOLD (0.85) so direct search is skipped + const searchResults = [ + makeSearchResult({path: 'topics/auth.md', score: 0.75, title: 'Auth Guide'}), + makeSearchResult({path: 'topics/security.md', score: 0.72, title: 'Security Guide'}), + ] + const searchService = createMockSearchService(searchResults) + // No baseDirectory — avoids FileContextTreeManifestService filesystem access + const executor = new QueryExecutor({fileSystem, searchService}) - const result = await sandbox.execute(`${llmGeneratedMetaVar}.resultCount`) + const result = await executor.executeWithAgent(agent, {query: 'how does authentication work', taskId: TASK_ID}) - expect(result.stderr).to.equal('') - expect(result.returnValue).to.equal(3) + expect(result.tier).to.equal(TIER_OPTIMIZED_LLM) + expect(result.matchedDocs).to.have.length(2) + expect(result.matchedDocs[0]).to.deep.equal({path: 'topics/auth.md', score: 0.75, title: 'Auth Guide'}) + expect(result.matchedDocs[1]).to.deep.equal({path: 'topics/security.md', score: 0.72, title: 'Security Guide'}) + expect(result.searchMetadata).to.deep.include({resultCount: 2, totalFound: 2}) + expect(result.searchMetadata!.topScore).to.equal(0.75) + expect(result.timing.durationMs).to.be.at.least(0) + expect(result.response).to.include('LLM response') + expect(result.response).to.include(ATTRIBUTION_FOOTER) + expect((agent.executeOnSession as SinonStub).calledOnce).to.be.true + }) }) - it('should correctly transform all UUID segments (4 hyphens replaced)', () => { - const taskIdSafe = taskId.replaceAll('-', '_') + describe('Tier 4: full agentic (no prefetched context)', () => { + it('should return tier 4 when all search scores are below smart routing threshold', async () => { + const agent = createMockAgent() + const fileSystem = createMockFileSystem() + // All scores below SMART_ROUTING_SCORE_THRESHOLD (0.7) → no prefetched context + const searchResults = [ + makeSearchResult({path: 'topics/misc.md', score: 0.5, title: 'Misc Notes'}), + makeSearchResult({path: 'topics/other.md', score: 0.4, title: 'Other'}), + ] + const searchService = createMockSearchService(searchResults) + const executor = new QueryExecutor({fileSystem, searchService}) - expect(taskIdSafe).to.not.include('-') - expect(taskIdSafe).to.equal('8cd8e2d8_a7fc_4371_89ca_59460687c12d') + const result = await executor.executeWithAgent(agent, {query: 'complex multi-step question', taskId: TASK_ID}) - expect(`__query_results_${taskIdSafe}`).to.equal(llmGeneratedResultsVar) - expect(`__query_meta_${taskIdSafe}`).to.equal(llmGeneratedMetaVar) + expect(result.tier).to.equal(TIER_FULL_AGENTIC) + expect(result.matchedDocs).to.have.length(2) + expect(result.matchedDocs[0]).to.deep.equal({path: 'topics/misc.md', score: 0.5, title: 'Misc Notes'}) + expect(result.searchMetadata).to.deep.include({resultCount: 2, totalFound: 2}) + expect(result.searchMetadata!.topScore).to.equal(0.5) + expect(result.timing.durationMs).to.be.at.least(0) + expect(result.response).to.include('LLM response') + expect(result.response).to.include(ATTRIBUTION_FOOTER) + expect((agent.executeOnSession as SinonStub).calledOnce).to.be.true + }) }) }) })