Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 36 additions & 17 deletions agents-api/src/__tests__/run/agents/Agent.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,10 @@ const {
getToolsForAgentMock,
getFunctionToolsForSubAgentMock,
buildPersistedMessageContentMock,
createDefaultConversationHistoryConfigMock,
getFormattedConversationHistoryMock,
getConversationHistoryWithCompressionMock,
formatMessagesAsConversationHistoryMock,
} = vi.hoisted(() => {
const getCredentialReferenceMock = vi.fn(() => vi.fn().mockResolvedValue(null));
const getContextConfigByIdMock = vi.fn(() => vi.fn().mockResolvedValue(null));
Expand All @@ -159,6 +163,20 @@ const {
);
const getFunctionToolsForSubAgentMock = vi.fn().mockResolvedValue([]);
const buildPersistedMessageContentMock = vi.fn();
const createDefaultConversationHistoryConfigMock = vi.fn().mockReturnValue({
mode: 'full',
limit: 50,
includeInternal: true,
messageTypes: ['chat'],
maxOutputTokens: 4000,
});
const getFormattedConversationHistoryMock = vi
.fn()
.mockResolvedValue('Mock conversation history');
const getConversationHistoryWithCompressionMock = vi.fn().mockResolvedValue([]);
const formatMessagesAsConversationHistoryMock = vi
.fn()
.mockReturnValue('Mock conversation history');

return {
getCredentialReferenceMock,
Expand All @@ -170,6 +188,10 @@ const {
getToolsForAgentMock,
getFunctionToolsForSubAgentMock,
buildPersistedMessageContentMock,
createDefaultConversationHistoryConfigMock,
getFormattedConversationHistoryMock,
getConversationHistoryWithCompressionMock,
formatMessagesAsConversationHistoryMock,
};
});

Expand Down Expand Up @@ -229,9 +251,10 @@ vi.mock('../../../domains/run/data/conversations', async (importOriginal) => {
const actual = (await importOriginal()) as any;
return {
...actual,
getConversationHistoryWithCompression: vi
.fn()
.mockResolvedValue('Mock conversation history as string'),
createDefaultConversationHistoryConfig: createDefaultConversationHistoryConfigMock,
getFormattedConversationHistory: getFormattedConversationHistoryMock,
getConversationHistoryWithCompression: getConversationHistoryWithCompressionMock,
formatMessagesAsConversationHistory: formatMessagesAsConversationHistoryMock,
};
});

Expand Down Expand Up @@ -362,20 +385,16 @@ vi.mock('../../../domains/run/agents/SystemPromptBuilder.js', () => ({
})),
}));

vi.mock('../../../domains/run/data/conversations.js', () => ({
createDefaultConversationHistoryConfig: vi.fn().mockReturnValue({
mode: 'full',
limit: 50,
includeInternal: true,
messageTypes: ['chat'],
maxOutputTokens: 4000,
}),
getFormattedConversationHistory: vi.fn().mockResolvedValue('Mock conversation history'),
getConversationScopedArtifacts: vi.fn().mockResolvedValue([]),
getConversationHistoryWithCompression: vi
.fn()
.mockResolvedValue('Mock conversation history as string'),
}));
vi.mock('../../../domains/run/data/conversations.js', async (importOriginal) => {
const actual = (await importOriginal()) as any;
return {
...actual,
createDefaultConversationHistoryConfig: createDefaultConversationHistoryConfigMock,
getFormattedConversationHistory: getFormattedConversationHistoryMock,
getConversationHistoryWithCompression: getConversationHistoryWithCompressionMock,
formatMessagesAsConversationHistory: formatMessagesAsConversationHistoryMock,
};
});

// Import the mocked module - these will automatically be mocked
import {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,15 @@ describe('getConversationHistoryWithCompression — artifact replacement', () =>

const result = await getConversationHistoryWithCompression(baseParams);

expect(result).toContain('Artifact: "Google Doc"');
expect(result).toContain('id: art-1');
expect(result).toContain('args:');
expect(result).toContain('description: Fetched document content');
expect(result).toContain('summary:');
expect(result).not.toContain(rawContent);
const toolResult = result.find((msg) => msg.messageType === 'tool-result');
const toolResultText = toolResult?.content?.text ?? '';

expect(toolResultText).toContain('Artifact: "Google Doc"');
expect(toolResultText).toContain('id: art-1');
expect(toolResultText).toContain('args:');
expect(toolResultText).toContain('description: Fetched document content');
expect(toolResultText).toContain('summary:');
expect(toolResultText).not.toContain(rawContent);
});

it('batches toolCallId lookups in a single getLedgerArtifacts call', async () => {
Expand Down Expand Up @@ -134,7 +137,45 @@ describe('getConversationHistoryWithCompression — artifact replacement', () =>

const result = await getConversationHistoryWithCompression(baseParams);

expect(result).toContain(content);
expect(result).not.toContain('Artifact:');
const toolResult = result.find((msg) => msg.messageType === 'tool-result');
const toolResultText = toolResult?.content?.text ?? '';

expect(toolResultText).toContain(content);
expect(toolResultText).not.toContain('Artifact:');
});

it('preserves all artifact references when multiple artifacts share a toolCallId', async () => {
const messages = [makeToolResultMessage('tc-shared', 'raw tool output')];

mockGetConversationHistory.mockReturnValue(vi.fn().mockResolvedValue(messages));
mockGetLedgerArtifacts.mockReturnValue(
vi.fn().mockResolvedValue([
{
artifactId: 'art-1',
toolCallId: 'tc-shared',
name: 'First',
description: 'First artifact',
parts: [{ kind: 'data', data: { summary: { text: 'one' } } }],
metadata: {},
createdAt: new Date().toISOString(),
},
{
artifactId: 'art-2',
toolCallId: 'tc-shared',
name: 'Second',
description: 'Second artifact',
parts: [{ kind: 'data', data: { summary: { text: 'two' } } }],
metadata: {},
createdAt: new Date().toISOString(),
},
])
);

const result = await getConversationHistoryWithCompression(baseParams);
const toolResult = result.find((msg) => msg.messageType === 'tool-result');
const toolResultText = toolResult?.content?.text ?? '';

expect(toolResultText).toContain('id: art-1');
expect(toolResultText).toContain('id: art-2');
Comment on lines +174 to +179
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test verifies both artifact IDs are present but doesn't assert the \n join between them. If the join separator were accidentally changed to empty string or space, this test would still pass. Consider:

expect(toolResultText).toMatch(/id: art-1[\s\S]*\n[\s\S]*id: art-2/);

});
});
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { normalizeMimeType } from '@inkeep/agents-core/constants/allowed-file-fo
import { getLogger } from '../../../../logger';
import {
createDefaultConversationHistoryConfig,
formatMessagesAsConversationHistory,
getConversationHistoryWithCompression,
} from '../../data/conversations';
import {
Expand Down Expand Up @@ -62,7 +63,7 @@ export async function buildConversationHistory(
isDelegated: ctx.isDelegatedAgent,
};

conversationHistory = await getConversationHistoryWithCompression({
const historyMessages = await getConversationHistoryWithCompression({
tenantId: ctx.config.tenantId,
projectId: ctx.config.projectId,
conversationId: contextId,
Expand All @@ -74,8 +75,9 @@ export async function buildConversationHistory(
streamRequestId,
fullContextSize: initialContextBreakdown.total,
});
conversationHistory = formatMessagesAsConversationHistory(historyMessages);
} else if (historyConfig.mode === 'scoped') {
conversationHistory = await getConversationHistoryWithCompression({
const historyMessages = await getConversationHistoryWithCompression({
tenantId: ctx.config.tenantId,
projectId: ctx.config.projectId,
conversationId: contextId,
Expand All @@ -92,6 +94,7 @@ export async function buildConversationHistory(
streamRequestId,
fullContextSize: initialContextBreakdown.total,
});
conversationHistory = formatMessagesAsConversationHistory(historyMessages);
}
}

Expand Down
79 changes: 47 additions & 32 deletions agents-api/src/domains/run/data/conversations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
generateId,
getConversationHistory,
getLedgerArtifacts,
type MessageSelect,
type ResolvedRef,
} from '@inkeep/agents-core';
import { trace } from '@opentelemetry/api';
Expand Down Expand Up @@ -146,7 +147,7 @@ export async function getScopedHistory({
conversationId: string;
filters?: ConversationScopeOptions;
options?: ConversationHistoryConfig;
}): Promise<any[]> {
}): Promise<MessageSelect[]> {
try {
// First, get ALL messages to find the latest compression summary
// IMPORTANT: Always include internal messages and disable truncation to ensure tool results are available
Expand All @@ -173,7 +174,7 @@ export async function getScopedHistory({

const limit = options?.limit;

let messages: any[];
let messages: MessageSelect[];
if (latestCompressionSummary) {
// Get the summary + all messages after it
const summaryDate = new Date(latestCompressionSummary.createdAt);
Expand Down Expand Up @@ -427,7 +428,7 @@ export async function getConversationHistoryWithCompression({
baseModel?: any;
streamRequestId?: string;
fullContextSize?: number;
}): Promise<string> {
}): Promise<MessageSelect[]> {
const historyOptions = options ?? createDefaultConversationHistoryConfig();

// IMPORTANT: For conversation compression, we MUST include internal messages (tool results)
Expand Down Expand Up @@ -459,7 +460,7 @@ export async function getConversationHistoryWithCompression({
}

if (!messagesToFormat.length) {
return '';
return [];
}

// Replace tool-result content with compact artifact references BEFORE compression.
Expand All @@ -476,38 +477,45 @@ export async function getConversationHistoryWithCompression({
scopes: { tenantId, projectId },
toolCallIds,
});
const artifactsByToolCallId = new Map(
artifacts.filter((a) => a.toolCallId).map((a) => [a.toolCallId as string, a])
);
const artifactsByToolCallId = new Map<string, Artifact[]>();
for (const artifact of artifacts) {
if (!artifact.toolCallId) continue;
const existing = artifactsByToolCallId.get(artifact.toolCallId) || [];
existing.push(artifact);
artifactsByToolCallId.set(artifact.toolCallId, existing);
}
if (artifactsByToolCallId.size > 0) {
messagesToFormat = messagesToFormat.map((msg) => {
if (msg.messageType !== 'tool-result') return msg;
const tcId = msg.metadata?.a2a_metadata?.toolCallId;
const artifact = tcId ? artifactsByToolCallId.get(tcId) : undefined;
if (!artifact) return msg;
const relatedArtifacts = tcId ? artifactsByToolCallId.get(tcId) : undefined;
if (!relatedArtifacts || relatedArtifacts.length === 0) return msg;
const toolArgs = msg.metadata?.a2a_metadata?.toolArgs;
const rawArgs = toolArgs ? JSON.stringify(toolArgs) : undefined;
const argsStr =
rawArgs && rawArgs.length > 300 ? `${rawArgs.slice(0, 300)}...[truncated]` : rawArgs;
const dataPart = artifact.parts?.find(
(p): p is Extract<(typeof artifact.parts)[number], { kind: 'data' }> =>
p.kind === 'data'
);
const summaryValue = dataPart?.data?.summary;
const rawSummary = summaryValue ? JSON.stringify(summaryValue) : undefined;
const summaryDataStr =
rawSummary && rawSummary.length > 1000
? `${rawSummary.slice(0, 1000)}...[truncated]`
: rawSummary;
const refParts = [
`Artifact: "${artifact.name ?? artifact.artifactId}" (id: ${artifact.artifactId})`,
];
if (argsStr) refParts.push(`args: ${argsStr}`);
if (artifact.description) refParts.push(`description: ${artifact.description}`);
if (summaryDataStr) refParts.push(`summary: ${summaryDataStr}`);
const refParts = relatedArtifacts.map((artifact) => {
const dataPart = artifact.parts?.find(
(p): p is Extract<(typeof artifact.parts)[number], { kind: 'data' }> =>
p.kind === 'data'
);
const summaryValue = dataPart?.data?.summary;
const rawSummary = summaryValue ? JSON.stringify(summaryValue) : undefined;
const summaryDataStr =
rawSummary && rawSummary.length > 1000
? `${rawSummary.slice(0, 1000)}...[truncated]`
: rawSummary;
const artifactParts = [
`Artifact: "${artifact.name ?? artifact.artifactId}" (id: ${artifact.artifactId})`,
];
if (argsStr) artifactParts.push(`args: ${argsStr}`);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Minor: Args duplicated for each artifact sharing a toolCallId

Issue: When multiple artifacts share the same toolCallId, the argsStr (computed once from message metadata at lines 492-495) gets appended to every artifact's reference string inside the map. Since all artifacts for the same tool call came from the same invocation, they have identical toolArgs, resulting in the same args: {...} appearing N times in the output.

Why: This unnecessarily bloats token count in the conversation history. For tool calls that produce multiple artifacts (e.g., multiple images or documents), the args string could be repeated 2-10x.

Fix: Consider moving args to a single prefix line before the artifact list:

const argsLine = argsStr ? `Tool call args: ${argsStr}\n` : '';
const artifactRefs = relatedArtifacts.map((artifact) => {
  // ... build artifact-specific parts WITHOUT args
});
return {
  ...msg,
  content: { text: argsLine + artifactRefs.join('\n') },
};

Refs:

if (artifact.description) artifactParts.push(`description: ${artifact.description}`);
if (summaryDataStr) artifactParts.push(`summary: ${summaryDataStr}`);
return `[${artifactParts.join(' | ')}]`;
});
return {
...msg,
content: { text: `[${refParts.join(' | ')}]` },
content: { text: refParts.join('\n') },
};
});
}
Expand Down Expand Up @@ -561,7 +569,7 @@ export async function getConversationHistoryWithCompression({
}
}

return formatMessagesAsConversationHistory(messagesToFormat);
return messagesToFormat;
}

/**
Expand Down Expand Up @@ -894,10 +902,12 @@ export function reconstructMessageText(msg: any): string {

return parts
.map((part: any) => {
if (part.type === 'text') {
const partKind = part.kind ?? part.type;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The part.kind ?? part.type fallback handles both A2A protocol shape ({ kind: 'text' }) and legacy shape ({ type: 'text' }). The existing reconstructMessageText tests only cover { type: ... } parts — there's no test coverage for the { kind: ... } shape. Add a test case to verify the fallback works.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💭 Consider: Defensive fallback for part.kind ?? part.type

Issue: This fallback suggests there are two different part schemas in play: the canonical MessageContent.parts type (which uses kind per the schema definitions) and some other format that uses type. The fallback is reasonable defensive code, but there's no documentation explaining when each variant occurs.

Why: Without clarity on the source of type-keyed parts, this could mask upstream inconsistencies rather than solve a real polymorphism need. The canonical types in utility.ts and a2a.ts both use kind.

Fix: If both are legitimate variants (e.g., legacy data or external formats), add a brief inline comment:

// Support both 'kind' (canonical schema) and 'type' (legacy/external format)
const partKind = part.kind ?? part.type;

If type is never expected, consider logging when encountered to track occurrences.


if (partKind === 'text') {
return part.text ?? '';
}
if (part.type === 'data') {
if (partKind === 'data') {
try {
const data = typeof part.data === 'string' ? JSON.parse(part.data) : part.data;
if (data?.artifactId && data?.toolCallId) {
Expand All @@ -912,9 +922,9 @@ export function reconstructMessageText(msg: any): string {
.join('');
}

function formatMessagesAsConversationHistory(messages: any[]): string {
export function formatMessagesAsConversationHistory(messages: MessageSelect[]): string {
const formattedHistory = messages
.map((msg: any) => {
.map((msg: MessageSelect) => {
let roleLabel: string;

if (msg.role === 'user') {
Expand All @@ -939,8 +949,13 @@ function formatMessagesAsConversationHistory(messages: any[]): string {
roleLabel = msg.role || 'system';
}

return `${roleLabel}: """${reconstructMessageText(msg)}"""`;
const reconstructedMessage = reconstructMessageText(msg);
if (!reconstructedMessage) {
return null;
}
return `${roleLabel}: """${reconstructedMessage}"""`;
})
.filter((line): line is string => line !== null)
Comment on lines +952 to +958
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

formatMessagesAsConversationHistory now filters out messages where reconstructMessageText returns empty string — this is new behavior (previously all messages were formatted). A message with only non-text/non-artifact parts (e.g. file parts) would be silently dropped. This needs a direct test, and it's worth confirming this is the desired behavior for all message types.

Comment on lines +952 to +958
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💭 Consider: Behavioral change - empty messages now filtered out

Issue: The new implementation filters out messages with empty reconstructed text (returning null then filtering). The previous implementation would include these as roleLabel: """""". This is arguably an improvement, but goes beyond the stated refactoring goal.

Why: This could affect downstream consumers that rely on message boundaries (e.g., compression logic counting messages) or debugging traces. If intentional, it's worth documenting.

Fix: If intentional, consider adding a test case validating the filtering behavior and noting this in the PR description as a deliberate improvement.

.join('\n');

return `<conversation_history>\n${formattedHistory}\n</conversation_history>\n`;
Comment on lines 925 to 961
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The newly exported formatMessagesAsConversationHistory differs from the inline formatting in getFormattedConversationHistory (around line 376) in two meaningful ways:

  1. This function uses reconstructMessageText(msg) (handles multi-part content with artifact refs), while the other uses msg.content.text directly — silently dropping artifact reference tags from multi-part messages.
  2. This function filters out messages with empty reconstructed text, while the other includes them (producing role: """""" entries).

Since getFormattedConversationHistory is still actively called from AgentSession.ts, these inconsistencies produce different conversation history formats depending on the code path. Consider refactoring getFormattedConversationHistory to delegate to this exported function.

Expand Down
Loading
Loading