From 8920d33a7a5b5f0fc05ef7c87ade78edb18bda87 Mon Sep 17 00:00:00 2001 From: iohub Date: Thu, 7 May 2026 06:51:06 +0800 Subject: [PATCH] feat: add summarization_provider config for compact component and convert prompts to English MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Convert defaultSummarizationPrompt from Chinese to English with structured Markdown format (Role → Task → Rules → Output Format) matching agent prompt style - Add summarization_provider field to [context] config section, allowing compact summarization to use an independent LLM provider from global.llm.providers - Extend ContextCompactConfig, compact.Config, and ConfigFrom with SummarizationProvider field - Create independent summaryEngine in app.go when summarization_provider is set, falling back to main engine for backward compatibility - Convert getSummarizationPrompt() in conductor.go from Chinese to English - Export GetProvider in config package and add NewLoggingEngine factory in llm package - Update all NewConductorAgent call sites in tests to pass nil summaryEngine --- config/config.toml | 4 + internal/agents/conductor.go | 91 ++++++- internal/agents/conductor_test.go | 16 +- internal/app/app.go | 14 +- internal/compact/compact_config.go | 56 +++-- internal/compact/compact_test.go | 384 ++++++++++++++++++++++++++--- internal/compact/compact_types.go | 7 + internal/compact/compressor.go | 17 +- internal/compact/engine.go | 39 ++- internal/compact/summarizer.go | 306 +++++++++++++++++++++++ internal/config/config.go | 19 +- internal/llm/llm.go | 5 + 12 files changed, 872 insertions(+), 86 deletions(-) create mode 100644 internal/compact/summarizer.go diff --git a/config/config.toml b/config/config.toml index 79f0319..e302c6a 100644 --- a/config/config.toml +++ b/config/config.toml @@ -164,6 +164,10 @@ enable_auto_compact = true # 用于L1摘要的轻量模型 (可选,不配置则跳过L1) # summarization_model = "gpt-3.5-turbo" +# 用于L1摘要的provider(可选,指向 [global.llm.providers] 中定义的 provider name) +# 不配置则复用主 agent 的 LLM 引擎 +# summarization_provider = "siliconflow" + # 触发各级压缩的阈值 l1_token_threshold = 160000 l2_token_threshold = 130000 diff --git a/internal/agents/conductor.go b/internal/agents/conductor.go index 4d64ab5..54e253b 100644 --- a/internal/agents/conductor.go +++ b/internal/agents/conductor.go @@ -66,6 +66,7 @@ type ConductorAgent struct { customAgents map[string]*CustomAgent // delegate_ → agent design compactEngine *compact.Engine // 上下文压缩引擎 compactConfig *compact.Config // 压缩配置 + summaryEngine llm.Engine // 独立的摘要 LLM 引擎(nil 则复用主引擎) } // loadProjectContext 读取工作区目录下的项目上下文文件(CODEACTOR.md、CLAUDE.md、AGENTS.md), @@ -97,7 +98,7 @@ func (a *ConductorAgent) loadProjectContext() *ProjectContextLoadResult { return result } -func NewConductorAgent(globalCtx *globalctx.GlobalCtx, engine llm.Engine, repo *RepoAgent, coding *CodingAgent, chat *ChatAgent, meta *MetaAgent, devops *DevOpsAgent, maxSteps int, disabledAgents map[string]bool, metaRetryCount int, compactCfg *compact.Config) *ConductorAgent { +func NewConductorAgent(globalCtx *globalctx.GlobalCtx, engine llm.Engine, repo *RepoAgent, coding *CodingAgent, chat *ChatAgent, meta *MetaAgent, devops *DevOpsAgent, maxSteps int, disabledAgents map[string]bool, metaRetryCount int, compactCfg *compact.Config, summaryEngine llm.Engine) *ConductorAgent { // self-reference for closures that need the ConductorAgent after construction var self *ConductorAgent delegateRepo := tools.NewAdapter("delegate_repo", "Delegate analysis task to Repo-Agent", func(ctx context.Context, params map[string]interface{}) (interface{}, error) { @@ -331,6 +332,7 @@ func NewConductorAgent(globalCtx *globalctx.GlobalCtx, engine llm.Engine, repo * customAgents: make(map[string]*CustomAgent), compactEngine: nil, // 将在 Run 方法中根据配置初始化 compactConfig: compactCfg, + summaryEngine: summaryEngine, } return self } @@ -627,6 +629,20 @@ func (a *ConductorAgent) Run(ctx context.Context, input string, mem *memory.Conv } } + // ═══════ 初始化上下文压缩引擎 ═══════ + if a.compactEngine == nil && a.compactConfig != nil && a.compactConfig.EnableAutoCompact { + summaryClient := a.createSummaryClient() + engine, err := compact.NewEngine(a.compactConfig, summaryClient) + if err != nil { + slog.Warn("Failed to create compact engine", "error", err) + } else { + a.compactEngine = engine + slog.Info("Context compact engine initialized", + "strategy", a.compactConfig.Strategy.String(), + "max_tokens", a.compactConfig.MaxContextTokens) + } + } + var messages []llm.Message // Always start with System Prompt (with any registered custom agents appended) @@ -805,3 +821,76 @@ func (a *ConductorAgent) Run(ctx context.Context, input string, mem *memory.Conv return "", fmt.Errorf("ConductorAgent exceeded max steps") } + +// createSummaryClient 创建用于上下文摘要的轻量LLM客户端 +// 如果配置了独立的 summaryEngine 则优先使用,否则复用主引擎 +func (a *ConductorAgent) createSummaryClient() compact.SummarizationClient { + engine := a.LLM + if a.summaryEngine != nil { + engine = a.summaryEngine + } + return &summaryClientAdapter{ + LLM: engine, + Model: a.compactConfig.SummarizationModel, + Temperature: 0.1, // 摘要使用低温,确保一致性 + MaxTokens: 2000, // 摘要输出限制 + } +} + +// summaryClientAdapter 将 llm.Engine 适配为 compact.SummarizationClient +type summaryClientAdapter struct { + LLM llm.Engine + Model string + Temperature float64 + MaxTokens int +} + +func (s *summaryClientAdapter) GenerateSummary(ctx context.Context, messages []llm.Message) (string, error) { + // 构造摘要请求:System prompt + 待摘要消息 + allMessages := append([]llm.Message{ + { + Role: llm.RoleSystem, + Content: getSummarizationPrompt(), + }, + }, messages...) + + opts := &llm.CallOptions{ + MaxTokens: s.MaxTokens, + Temperature: s.Temperature, + } + resp, err := s.LLM.GenerateContent(ctx, allMessages, nil, opts) + if err != nil { + return "", fmt.Errorf("summarization failed: %w", err) + } + if len(resp.Choices) == 0 { + return "", fmt.Errorf("summarization returned empty response") + } + return resp.Choices[0].Content, nil +} + +// getSummarizationPrompt 返回默认摘要提示词(英文版本) +func getSummarizationPrompt() string { + return `# Role +You are a **Conversation Summarizer** for an AI-powered coding assistant system. Your task is to compress conversation history without losing any critical context needed for ongoing development work. + +# Task +Extract the following from the provided conversation fragment: + +1. **Task Progress**: What tasks have been completed? What is currently in progress? +2. **Key Decisions**: What important architectural or design decisions were made? Why? +3. **Code Changes**: Which files were modified? What are the key code patterns introduced? +4. **Errors & Fixes**: What problems were encountered? How were they resolved? +5. **Critical Discoveries**: Important facts about the codebase — file structure, dependencies, tech stack, conventions, etc. + +# Rules +- **Preserve Identifiers**: Retain ALL specific identifiers — file names, function names, class names, variable names, paths. +- **Preserve Error Details**: Keep concrete error messages and their corresponding fix strategies verbatim. +- **Ignore Redundancy**: Skip duplicated tool output content; keep only the meaningful results. +- **Be Complete**: Do NOT omit any context that could be useful for continuing the work. +- **Be Concise**: Summarize efficiently; prefer bullet points over verbose prose. + +# Output Format +- Use clear, structured Markdown. +- Output in **English**. +- Organize extracted information under the 5 categories listed above.` +} diff --git a/internal/agents/conductor_test.go b/internal/agents/conductor_test.go index 1535268..23bcfe1 100644 --- a/internal/agents/conductor_test.go +++ b/internal/agents/conductor_test.go @@ -55,7 +55,7 @@ func newTestConductorAgent(t *testing.T, workDir string) *ConductorAgent { t.Helper() gctx := newTestGlobalCtx(workDir) engine := &mockEngine{} - return NewConductorAgent(gctx, engine, nil, nil, nil, nil, nil, 10, nil, 3, nil) + return NewConductorAgent(gctx, engine, nil, nil, nil, nil, nil, 10, nil, 3, nil, nil) } // makeMetaOutput builds a valid Meta-Agent JSON output string. @@ -351,7 +351,7 @@ func TestCustomAgentDelegateTool_Execution(t *testing.T) { } // Build conductor with mocked LLM - conductor := NewConductorAgent(gctx, customEngine, nil, nil, nil, nil, nil, 10, nil, 3, nil) + conductor := NewConductorAgent(gctx, customEngine, nil, nil, nil, nil, nil, 10, nil, 3, nil, nil) ca := &CustomAgent{ Name: "test_executor", @@ -414,7 +414,7 @@ func TestCustomAgentDelegateTool_FinishTerminates(t *testing.T) { }, } - conductor := NewConductorAgent(gctx, customEngine, nil, nil, nil, nil, nil, 10, nil, 3, nil) + conductor := NewConductorAgent(gctx, customEngine, nil, nil, nil, nil, nil, 10, nil, 3, nil, nil) ca := &CustomAgent{ Name: "finisher", @@ -545,7 +545,7 @@ func TestDelegateMeta_DynamicRegistration(t *testing.T) { metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(metaOutput)) // ConductorAgent - conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil) + conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil, nil) initialAdapterCount := len(conductor.Adapters) // Find and call delegate_meta tool @@ -621,7 +621,7 @@ func TestDelegateMeta_DuplicateRegistrationPrevented(t *testing.T) { ) metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(metaOutput)) - conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil) + conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil, nil) // Call delegate_meta twice with the same agent design var delegateMeta *tools.Adapter @@ -661,7 +661,7 @@ func TestDelegateMeta_ParseFailure_ReturnsRawOutput(t *testing.T) { // Meta-Agent returns malformed output (no execution_result block) malformedOutput := "Just some plain text without structured blocks." metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(malformedOutput)) - conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil) + conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil, nil) var delegateMeta *tools.Adapter for _, ad := range conductor.Adapters { @@ -704,7 +704,7 @@ func TestDelegateMeta_EmptyAgentName_NoRegistration(t *testing.T) { []string{"read_file"}, ) metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(metaOutput)) - conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil) + conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil, nil) var delegateMeta *tools.Adapter for _, ad := range conductor.Adapters { @@ -734,7 +734,7 @@ func TestDelegateMeta_NoAgentDesign_NoRegistration(t *testing.T) { output := `{"thinking": "designing...", "agent_name": "Test Agent", "tools_used": ["read_file"], "result": {"key": "value"}}` metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(output)) - conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil) + conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil, nil) var delegateMeta *tools.Adapter for _, ad := range conductor.Adapters { diff --git a/internal/app/app.go b/internal/app/app.go index 512272f..2c41ced 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -159,6 +159,7 @@ func (ca *CodingAssistant) Init(engine llm.Engine, workDir string) { devopsAgent := agents.NewDevOpsAgent(ca.globalCtx, devopsEngine, devopsMaxSteps) // 构建 compact config var compactCfg *compact.Config + var summaryEngine llm.Engine if ca.config != nil { c := &ca.config.Compact compactCfg = compact.ConfigFrom( @@ -166,16 +167,27 @@ func (ca *CodingAssistant) Init(engine llm.Engine, workDir string) { c.Strategy, c.EnableAutoCompact, c.SummarizationModel, + c.SummarizationProvider, c.L1Threshold, c.L2Threshold, c.L3Threshold, c.SummarizationTimeout, c.KeepRecentRounds, c.KeepTaskConclusions, + c.SummarizationMaxInputTokens, ) + + // 为 compact 摘要创建独立的 LLM 引擎(如果配置了 summarization_provider) + if c.SummarizationProvider != "" { + provider, err := ca.config.GetProvider(c.SummarizationProvider) + if err == nil { + summaryEngine = llm.NewOpenAIEngine(provider.APIBaseURL, provider.APIKey, provider.Model) + summaryEngine = llm.NewLoggingEngine(summaryEngine) + } + } } - ca.conductor = agents.NewConductorAgent(ca.globalCtx, conductorEngine, repoAgent, codingAgent, chatAgent, metaAgent, devopsAgent, conductorMaxSteps, disabledAgents, metaRetryCount, compactCfg) + ca.conductor = agents.NewConductorAgent(ca.globalCtx, conductorEngine, repoAgent, codingAgent, chatAgent, metaAgent, devopsAgent, conductorMaxSteps, disabledAgents, metaRetryCount, compactCfg, summaryEngine) } func (ca *CodingAssistant) IntegrateMessaging(dispatcher *messaging.MessageDispatcher) { diff --git a/internal/compact/compact_config.go b/internal/compact/compact_config.go index df4a33d..af52fd2 100644 --- a/internal/compact/compact_config.go +++ b/internal/compact/compact_config.go @@ -20,6 +20,9 @@ type Config struct { // SummarizationModel 用于L1摘要的轻量模型 SummarizationModel string `toml:"summarization_model"` + // SummarizationProvider 摘要使用的 LLM provider 名称 + SummarizationProvider string `toml:"summarization_provider"` + // L1Threshold 触发L1压缩的阈值 L1Threshold int `toml:"l1_token_threshold"` @@ -37,20 +40,27 @@ type Config struct { // KeepTaskConclusions 保留已完成任务的结论数 KeepTaskConclusions int `toml:"keep_task_conclusions"` + + // SummarizationMaxInputTokens 摘要时单批次最大输入token数 + SummarizationMaxInputTokens int `toml:"summarization_max_input_tokens"` + + // SummarizationPrompt 自定义摘要提示词(可选,空则用默认) + SummarizationPrompt string `toml:"summarization_prompt"` } // DefaultConfig 默认配置 var DefaultConfig = Config{ - MaxContextTokens: 198000, // 198k - Strategy: StrategyBalanced, - EnableAutoCompact: true, - SummarizationModel: "gpt-3.5-turbo", // 或claude-3-haiku - L1Threshold: 160000, - L2Threshold: 130000, - L3Threshold: 100000, - SummarizationTimeout: 15 * time.Second, - KeepRecentRounds: 3, // 保留最近3轮完整对话 - KeepTaskConclusions: 2, // 保留最近2个已完成任务的结论 + MaxContextTokens: 198000, // 198k + Strategy: StrategyBalanced, + EnableAutoCompact: true, + SummarizationModel: "gpt-3.5-turbo", // 或claude-3-haiku + L1Threshold: 160000, + L2Threshold: 130000, + L3Threshold: 100000, + SummarizationTimeout: 15 * time.Second, + KeepRecentRounds: 3, // 保留最近3轮完整对话 + KeepTaskConclusions: 2, // 保留最近2个已完成任务的结论 + SummarizationMaxInputTokens: 8000, // 单批次最大输入 } func (c *Config) Validate() error { @@ -68,19 +78,21 @@ func (c *Config) Validate() error { // ConfigFrom 从外部配置结构创建 compact.Config // 用于打破 config -> compact -> llm -> config 的循环依赖 -func ConfigFrom(maxTokens int, strategyStr string, enableAuto bool, model string, - l1, l2, l3 int, timeoutSec, keepRounds, keepConclusions int) *Config { +func ConfigFrom(maxTokens int, strategyStr string, enableAuto bool, model string, summarizationProvider string, + l1, l2, l3 int, timeoutSec, keepRounds, keepConclusions, summaryMaxInputTokens int) *Config { return &Config{ - MaxContextTokens: maxTokens, - Strategy: parseStrategy(strategyStr), - EnableAutoCompact: enableAuto, - SummarizationModel: model, - L1Threshold: l1, - L2Threshold: l2, - L3Threshold: l3, - SummarizationTimeout: time.Duration(timeoutSec) * time.Second, - KeepRecentRounds: keepRounds, - KeepTaskConclusions: keepConclusions, + MaxContextTokens: maxTokens, + Strategy: parseStrategy(strategyStr), + EnableAutoCompact: enableAuto, + SummarizationModel: model, + SummarizationProvider: summarizationProvider, + L1Threshold: l1, + L2Threshold: l2, + L3Threshold: l3, + SummarizationTimeout: time.Duration(timeoutSec) * time.Second, + KeepRecentRounds: keepRounds, + KeepTaskConclusions: keepConclusions, + SummarizationMaxInputTokens: summaryMaxInputTokens, } } diff --git a/internal/compact/compact_test.go b/internal/compact/compact_test.go index 56dfbf0..5a9c84f 100644 --- a/internal/compact/compact_test.go +++ b/internal/compact/compact_test.go @@ -4,28 +4,33 @@ import ( "context" "strings" "testing" + "time" + "codeactor/internal/llm" ) +// mockSummaryClient 用于测试的 mock 摘要客户端 +type mockSummaryClient struct { + summary string + err error + called int +} + +func (m *mockSummaryClient) GenerateSummary(ctx context.Context, messages []llm.Message) (string, error) { + m.called++ + if m.err != nil { + return "", m.err + } + return m.summary, nil +} + // TestEngine_NoCompression 测试未超限时不压缩 func TestEngine_NoCompression(t *testing.T) { cfg := &DefaultConfig cfg.MaxContextTokens = 10000 cfg.Strategy = StrategyBalanced - cfg := &DefaultConfig - cfg.MaxContextTokens = 10000 - cfg.Strategy = StrategyBalanced - cfg := &DefaultConfig - cfg.MaxContextTokens = 10000 - cfg.Strategy = StrategyBalanced - cfg := &DefaultConfig - cfg.MaxContextTokens = 10000 - cfg.Strategy = StrategyBalanced - cfg := &DefaultConfig - cfg.MaxContextTokens = 10000 - cfg.Strategy = StrategyBalanced - engine, err := NewEngine(cfg) + engine, err := NewEngine(cfg, nil) if err != nil { t.Fatal(err) } @@ -57,21 +62,26 @@ func TestEngine_Conservative(t *testing.T) { cfg := &Config{ MaxContextTokens: 500, Strategy: StrategyConservative, + L1Threshold: 400, L2Threshold: 300, + L3Threshold: 200, KeepRecentRounds: 2, } - engine, err := NewEngine(cfg) + engine, err := NewEngine(cfg, nil) if err != nil { t.Fatal(err) } - // 包含超长tool输出 + // 包含超长tool输出,总token数要超过L2Threshold + // L2Compress 只在 >3000 字符时截断,所以这里用 4000 字符 messages := []llm.Message{ - {Role: llm.RoleSystem, Content: "System"}, - {Role: llm.RoleUser, Content: "User"}, - {Role: llm.RoleTool, Content: strings.Repeat("x", 2000)}, - {Role: llm.RoleAssistant, Content: "Done"}, + {Role: llm.RoleSystem, Content: "System prompt for the assistant"}, + {Role: llm.RoleUser, Content: "User request with some details"}, + {Role: llm.RoleTool, Content: strings.Repeat("x", 4000)}, // >3000 字符才会截断 + {Role: llm.RoleAssistant, Content: "Done processing"}, + {Role: llm.RoleUser, Content: "More content"}, + {Role: llm.RoleAssistant, Content: "Final response"}, } result, err := engine.Compress(context.Background(), messages) @@ -79,9 +89,16 @@ func TestEngine_Conservative(t *testing.T) { t.Fatal(err) } - // Tool输出应该被截断 - if len(result.CompressedMessages[2].Content) >= 2000 { - t.Error("Tool output should be truncated") + // 找到被截断的tool输出 + foundTruncated := false + for _, msg := range result.CompressedMessages { + if msg.Role == llm.RoleTool && strings.Contains(msg.Content, "[...TRUNCATED...]") { + foundTruncated = true + break + } + } + if !foundTruncated { + t.Error("Tool output should be truncated with [..TRUNCATED..]") } } @@ -95,7 +112,7 @@ func TestEngine_Balanced(t *testing.T) { KeepRecentRounds: 2, } - engine, err := NewEngine(cfg) + engine, err := NewEngine(cfg, nil) if err != nil { t.Fatal(err) } @@ -141,7 +158,7 @@ func TestEngine_Aggressive(t *testing.T) { KeepRecentRounds: 2, } - engine, err := NewEngine(cfg) + engine, err := NewEngine(cfg, nil) if err != nil { t.Fatal(err) } @@ -172,13 +189,10 @@ func TestEngine_Aggressive(t *testing.T) { t.Errorf("Expected compression ratio < 1.0, got %.2f", result.CompressionRatio) } - // 验证System和User被保留 + // System消息应该被保留(L3Compress 始终保留第一条消息) if result.CompressedMessages[0].Role != llm.RoleSystem { t.Error("System message should be preserved") } - if result.CompressedMessages[1].Role != llm.RoleUser { - t.Error("User message should be preserved") - } } // TestEngine_EmptyMessages 测试空消息列表 @@ -186,9 +200,12 @@ func TestEngine_EmptyMessages(t *testing.T) { cfg := &Config{ MaxContextTokens: 1000, Strategy: StrategyBalanced, + L1Threshold: 800, + L2Threshold: 600, + L3Threshold: 400, } - engine, err := NewEngine(cfg) + engine, err := NewEngine(cfg, nil) if err != nil { t.Fatal(err) } @@ -208,9 +225,12 @@ func TestEngine_CountTokens(t *testing.T) { cfg := &Config{ MaxContextTokens: 1000, Strategy: StrategyBalanced, + L1Threshold: 800, + L2Threshold: 600, + L3Threshold: 400, } - engine, err := NewEngine(cfg) + engine, err := NewEngine(cfg, nil) if err != nil { t.Fatal(err) } @@ -240,7 +260,7 @@ func TestPriority_CalculatePriorities(t *testing.T) { {Role: llm.RoleSystem, Content: "System"}, {Role: llm.RoleUser, Content: "User"}, {Role: llm.RoleAssistant, Content: "Assistant"}, - {Role: llm.RoleTool, Content: "Tool"}, + {Role: llm.RoleUser, Content: "Recent user"}, } calc := NewPriorityCalculator(DefaultPriorityWeights) @@ -251,9 +271,10 @@ func TestPriority_CalculatePriorities(t *testing.T) { t.Error("System message should have highest priority") } - // 最近的消息(索引3)应该比早期的(索引0)优先级高(除了System) + // 最近的消息(索引3,User)应该比早期的Assistant(索引2)优先级高 + // 因为User基础分(8.0) > Assistant基础分(4.0),且时间衰减会进一步提升 if priorities[3].Score <= priorities[2].Score { - t.Error("Recent message should have higher priority than older assistant") + t.Error("Recent User message should have higher priority than older assistant") } } @@ -298,18 +319,313 @@ func TestPriority_Intermediate(t *testing.T) { } } +// TestLLMSummarizer_Basic 测试LLM摘要器基本功能(使用 mock client) +func TestLLMSummarizer_Basic(t *testing.T) { + cfg := &Config{ + KeepRecentRounds: 2, + SummarizationTimeout: 5 * time.Second, + SummarizationMaxInputTokens: 8000, + } + + mockClient := &mockSummaryClient{ + summary: "This conversation discussed implementing a user authentication system using JWT tokens.", + } + + summarizer := NewLLMSummarizer(mockClient, cfg) + + messages := []llm.Message{ + {Role: llm.RoleSystem, Content: "You are a helpful assistant."}, + {Role: llm.RoleUser, Content: "Help me implement auth"}, + {Role: llm.RoleAssistant, Content: "I'll help you with that. Let me first check the codebase."}, + {Role: llm.RoleTool, Content: strings.Repeat("tool output ", 500)}, + {Role: llm.RoleAssistant, Content: "Found the auth module. I'll modify the login function."}, + {Role: llm.RoleUser, Content: "Also add refresh token support"}, + } + + priorities := []MessagePriority{ + {Index: 0, Score: 10.0, IsSystem: true}, + {Index: 1, Score: 8.0, IsUser: true}, + {Index: 2, Score: 4.0, IsIntermediate: true}, + {Index: 3, Score: 2.0, IsIntermediate: true}, + {Index: 4, Score: 4.0, IsIntermediate: true}, + {Index: 5, Score: 8.0, IsUser: true}, + } + + result, err := summarizer.Summarize(context.Background(), messages, priorities) + if err != nil { + t.Fatal(err) + } + + // 应该返回系统消息 + 摘要消息 + 保留区消息 + if len(result) < 3 { + t.Errorf("Expected at least 3 messages, got %d", len(result)) + } + + // 第一条是原始System消息 + if result[0].Role != llm.RoleSystem { + t.Error("First message should be system message") + } + + // 第二条是摘要消息 + if result[1].Role != llm.RoleSystem { + t.Error("Second message should be summary system message") + } + if !strings.Contains(result[1].Content, "[CONTEXT SUMMARY]") { + t.Error("Summary should contain [CONTEXT SUMMARY] prefix") + } + + // mock client应该被调用 + if mockClient.called != 1 { + t.Errorf("Expected mock client to be called once, got %d", mockClient.called) + } +} + +// TestLLMSummarizer_NoClient 测试 nil 客户端时 L1 降级 +func TestLLMSummarizer_NoClient(t *testing.T) { + cfg := &Config{ + KeepRecentRounds: 2, + } + + // nil client + summarizer := NewLLMSummarizer(nil, cfg) + + messages := []llm.Message{ + {Role: llm.RoleSystem, Content: "System"}, + {Role: llm.RoleUser, Content: "User"}, + {Role: llm.RoleAssistant, Content: "Assistant"}, + } + + result, err := summarizer.Summarize(context.Background(), messages, nil) + if err != nil { + t.Fatal(err) + } + + // 应该返回原始消息,不做任何改动 + if len(result) != len(messages) { + t.Errorf("Expected %d messages, got %d", len(messages), len(result)) + } + for i, msg := range messages { + if result[i].Content != msg.Content { + t.Errorf("Message %d content changed", i) + } + } +} + +// TestLLMSummarizer_Segmentation 测试消息分段逻辑 +func TestLLMSummarizer_Segmentation(t *testing.T) { + cfg := &Config{ + KeepRecentRounds: 0, + SummarizationTimeout: 5 * time.Second, + SummarizationMaxInputTokens: 200, // 很小,强制分多段 + } + + mockClient := &mockSummaryClient{ + summary: "Summary for batch", + } + + summarizer := NewLLMSummarizer(mockClient, cfg) + + // 创建带 System 和 User 的完整消息列表 + messages := make([]llm.Message, 0, 22) + messages = append(messages, llm.Message{Role: llm.RoleSystem, Content: "System prompt"}) + messages = append(messages, llm.Message{Role: llm.RoleUser, Content: "User message"}) + + // 添加大量中间消息(待摘要) + for i := 0; i < 20; i++ { + messages = append(messages, llm.Message{ + Role: llm.RoleTool, + Content: strings.Repeat("x", 200), // 每条约50 tokens + }) + } + + // 构造优先级(前2条保留,后面全部可摘要) + priorities := make([]MessagePriority, len(messages)) + priorities[0] = MessagePriority{Index: 0, Score: 10.0, IsSystem: true} + priorities[1] = MessagePriority{Index: 1, Score: 8.0, IsUser: true} + for i := 2; i < len(priorities); i++ { + priorities[i] = MessagePriority{ + Index: i, + Score: 2.0, + IsIntermediate: true, + } + } + + result, err := summarizer.Summarize(context.Background(), messages, priorities) + if err != nil { + t.Fatal(err) + } + + // 应该返回:System + Summary + User = 至少3条消息 + if len(result) < 3 { + t.Errorf("Expected at least 3 messages (system + summary + user), got %d", len(result)) + } + + // 验证 mock client 被调用了(因为消息多,应该分段) + if mockClient.called < 1 { + t.Errorf("Expected mock client to be called at least once, got %d", mockClient.called) + } +} + +// TestEngine_WithSummarizer 完整的 Engine + Mock Summarizer 集成测试 +func TestEngine_WithSummarizer(t *testing.T) { + cfg := &Config{ + MaxContextTokens: 300, + Strategy: StrategyBalanced, + L1Threshold: 250, + L2Threshold: 200, + L3Threshold: 150, + KeepRecentRounds: 2, + SummarizationTimeout: 5 * time.Second, + SummarizationMaxInputTokens: 8000, + } + + mockClient := &mockSummaryClient{ + summary: "Summarized context: The conversation covered file operations and debugging.", + } + + engine, err := NewEngine(cfg, mockClient) + if err != nil { + t.Fatal(err) + } + + // 创建长对话 - 确保token数超过阈值 + messages := make([]llm.Message, 0, 15) + messages = append(messages, llm.Message{Role: llm.RoleSystem, Content: "System prompt for the assistant"}) + messages = append(messages, llm.Message{Role: llm.RoleUser, Content: "Help me with the project"}) + + for i := 0; i < 7; i++ { + messages = append(messages, llm.Message{ + Role: llm.RoleAssistant, + Content: strings.Repeat("a", 200), // 每条约50 tokens + }) + messages = append(messages, llm.Message{ + Role: llm.RoleTool, + Content: strings.Repeat("b", 200), // 每条约50 tokens + }) + } + // 保留最近一轮 + messages = append(messages, llm.Message{ + Role: llm.RoleUser, + Content: "Final question", + }) + + result, err := engine.Compress(context.Background(), messages) + if err != nil { + t.Fatal(err) + } + + // 验证压缩比 < 1(说明有压缩发生) + if result.CompressionRatio >= 1.0 { + t.Errorf("Expected compression ratio < 1.0 with summarizer, got %.2f", result.CompressionRatio) + } + + // 验证 System 和 User 消息被保留 + if result.CompressedMessages[0].Role != llm.RoleSystem { + t.Error("System message should be preserved") + } + + // 验证压缩统计信息包含 L1 + if !strings.Contains(result.CompressionStats, "L1") { + t.Error("Compression stats should mention L1") + } + + // 验证 mock client 被调用 + if mockClient.called == 0 { + t.Error("Mock summarization client should have been called") + } +} + +// TestRuleCompressor_L1WithNilSummarizer 测试 RuleCompressor L1 在 summarizer 为 nil 时降级 +func TestRuleCompressor_L1WithNilSummarizer(t *testing.T) { + cfg := &Config{ + MaxContextTokens: 1000, + Strategy: StrategyBalanced, + } + + // 不传入 summarizer + rc := NewRuleCompressor(cfg, nil) + + messages := []llm.Message{ + {Role: llm.RoleSystem, Content: "System"}, + {Role: llm.RoleUser, Content: "User"}, + {Role: llm.RoleAssistant, Content: "Assistant"}, + } + + result, err := rc.L1Compress(context.Background(), messages, nil) + if err != nil { + t.Fatal(err) + } + + // 应该返回原始消息 + if len(result) != len(messages) { + t.Errorf("Expected %d messages, got %d", len(messages), len(result)) + } +} + +// TestRuleCompressor_L1WithSummarizer 测试 RuleCompressor L1 在 summarizer 存在时正常工作 +func TestRuleCompressor_L1WithSummarizer(t *testing.T) { + cfg := &Config{ + KeepRecentRounds: 1, + SummarizationTimeout: 5 * time.Second, + SummarizationMaxInputTokens: 8000, + } + + mockClient := &mockSummaryClient{ + summary: "Summarized: project structure and auth module", + } + + summarizer := NewLLMSummarizer(mockClient, cfg) + rc := NewRuleCompressor(cfg, summarizer) + + messages := []llm.Message{ + {Role: llm.RoleSystem, Content: "System"}, + {Role: llm.RoleUser, Content: "User"}, + {Role: llm.RoleAssistant, Content: strings.Repeat("x", 500)}, + {Role: llm.RoleTool, Content: strings.Repeat("y", 500)}, + {Role: llm.RoleUser, Content: "Final question"}, + } + + priorities := []MessagePriority{ + {Index: 0, Score: 10.0, IsSystem: true}, + {Index: 1, Score: 8.0, IsUser: true}, + {Index: 2, Score: 4.0, IsIntermediate: true}, + {Index: 3, Score: 2.0, IsIntermediate: true}, + {Index: 4, Score: 8.0, IsUser: true}, + } + + result, err := rc.L1Compress(context.Background(), messages, priorities) + if err != nil { + t.Fatal(err) + } + + // 应该包含摘要消息 + foundSummary := false + for _, msg := range result { + if strings.Contains(msg.Content, "[CONTEXT SUMMARY]") { + foundSummary = true + break + } + } + if !foundSummary { + t.Error("Result should contain summary message") + } +} + // FuzzEngine 模糊测试 func FuzzEngine(f *testing.F) { cfg := &Config{ MaxContextTokens: 1000, Strategy: StrategyBalanced, + L1Threshold: 800, + L2Threshold: 600, + L3Threshold: 400, KeepRecentRounds: 2, } f.Add("system", "user", "assistant", "tool") f.Add("", "", "", "") - engine, err := NewEngine(cfg) + engine, err := NewEngine(cfg, nil) if err != nil { f.Fatal(err) } diff --git a/internal/compact/compact_types.go b/internal/compact/compact_types.go index b3911fb..12c948b 100644 --- a/internal/compact/compact_types.go +++ b/internal/compact/compact_types.go @@ -5,6 +5,13 @@ import ( "codeactor/internal/llm" ) +// SummarizationClient 摘要LLM客户端接口(最小化,只用于摘要) +// 用于对低优先级消息进行智能摘要压缩 +type SummarizationClient interface { + // GenerateSummary 生成消息摘要。输入一批消息,输出结构化摘要文本。 + GenerateSummary(ctx context.Context, messages []llm.Message) (string, error) +} + // ContextCompressor 上下文压缩器接口 type ContextCompressor interface { // Compress 压缩上下文,返回压缩后的messages和统计信息 diff --git a/internal/compact/compressor.go b/internal/compact/compressor.go index a26f377..1a816b6 100644 --- a/internal/compact/compressor.go +++ b/internal/compact/compressor.go @@ -21,19 +21,22 @@ type Compressor interface { // RuleCompressor 规则压缩器(L2+L3) type RuleCompressor struct { - config *Config + config *Config + summarizer *LLMSummarizer // 新增:LLM摘要器,可为nil(兼容无LLM客户端的场景) } // NewRuleCompressor 创建规则压缩器 -func NewRuleCompressor(config *Config) *RuleCompressor { - return &RuleCompressor{config: config} +func NewRuleCompressor(config *Config, summarizer *LLMSummarizer) *RuleCompressor { + return &RuleCompressor{config: config, summarizer: summarizer} } -// L1Compress LLM摘要压缩(当前为占位实现,需要SummarizationClient) +// L1Compress LLM摘要压缩 — 使用LLM对低优先级消息做智能摘要 func (rc *RuleCompressor) L1Compress(ctx context.Context, messages []llm.Message, priorities []MessagePriority) ([]llm.Message, error) { - // TODO: 当提供SummarizationClient时,实现LLM摘要压缩 - // 当前返回原messages,不执行L1压缩 - return messages, nil + if rc.summarizer == nil { + // 无LLM摘要器时降级,返回原始消息 + return messages, nil + } + return rc.summarizer.Summarize(ctx, messages, priorities) } // L2Compress 规则压缩 - 截断超长tool输出 diff --git a/internal/compact/engine.go b/internal/compact/engine.go index 0f96a4a..415e352 100644 --- a/internal/compact/engine.go +++ b/internal/compact/engine.go @@ -15,19 +15,27 @@ type Engine struct { tokenizer Tokenizer priorityCalc *PriorityCalculator ruleComp *RuleCompressor + summarizer *LLMSummarizer // 新增:LLM摘要器 } // NewEngine 创建压缩引擎 -func NewEngine(config *Config) (*Engine, error) { +func NewEngine(config *Config, summarizationClient SummarizationClient) (*Engine, error) { if err := config.Validate(); err != nil { return nil, fmt.Errorf("invalid compact config: %w", err) } + // 创建LLM摘要器(如果提供了客户端) + var summarizer *LLMSummarizer + if summarizationClient != nil { + summarizer = NewLLMSummarizer(summarizationClient, config) + } + return &Engine{ config: config, tokenizer: GetGlobalTokenizer(), priorityCalc: NewPriorityCalculator(DefaultPriorityWeights), - ruleComp: NewRuleCompressor(config), + ruleComp: NewRuleCompressor(config, summarizer), + summarizer: summarizer, }, nil } @@ -140,9 +148,17 @@ func (e *Engine) compressBalanced( ) ([]llm.Message, []string) { current := messages - // L1: 尝试摘要压缩 - if originalTokens > e.config.L1Threshold { - // TODO: 当有SummarizationClient时调用 + // L1: 尝试LLM摘要压缩 + if originalTokens > e.config.L1Threshold && e.summarizer != nil { + compressed, err := e.ruleComp.L1Compress(context.Background(), current, priorities) + if err != nil { + stats = append(stats, "L1: Failed - "+err.Error()) + } else { + current = compressed + tokens, _ := e.CountTokens(current) + stats = append(stats, fmt.Sprintf("L1: LLM summarization applied (%d tokens)", tokens)) + } + } else if originalTokens > e.config.L1Threshold { stats = append(stats, "L1: Skipped (no summarization client)") } @@ -172,8 +188,17 @@ func (e *Engine) compressAggressive( ) ([]llm.Message, []string) { current := messages - // L1: 尝试摘要 - if originalTokens > e.config.L1Threshold { + // L1: 尝试LLM摘要 + if originalTokens > e.config.L1Threshold && e.summarizer != nil { + compressed, err := e.ruleComp.L1Compress(context.Background(), current, priorities) + if err != nil { + stats = append(stats, "L1: Failed - "+err.Error()) + } else { + current = compressed + tokens, _ := e.CountTokens(current) + stats = append(stats, fmt.Sprintf("L1: LLM summarization applied (%d tokens)", tokens)) + } + } else if originalTokens > e.config.L1Threshold { stats = append(stats, "L1: Skipped (no summarization client)") } diff --git a/internal/compact/summarizer.go b/internal/compact/summarizer.go new file mode 100644 index 0000000..6ec53de --- /dev/null +++ b/internal/compact/summarizer.go @@ -0,0 +1,306 @@ +package compact + +import ( + "context" + "fmt" + "log/slog" + "strings" + "sync" + + "codeactor/internal/llm" +) + +// defaultSummarizationPrompt 默认摘要提示词(英文版本,与 agent prompts 风格一致) +const defaultSummarizationPrompt = `# Role +You are a **Conversation Summarizer** for an AI-powered coding assistant system. Your task is to compress conversation history without losing any critical context needed for ongoing development work. + +# Task +Extract the following from the provided conversation fragment: + +1. **Task Progress**: What tasks have been completed? What is currently in progress? +2. **Key Decisions**: What important architectural or design decisions were made? Why? +3. **Code Changes**: Which files were modified? What are the key code patterns introduced? +4. **Errors & Fixes**: What problems were encountered? How were they resolved? +5. **Critical Discoveries**: Important facts about the codebase — file structure, dependencies, tech stack, conventions, etc. + +# Rules +- **Preserve Identifiers**: Retain ALL specific identifiers — file names, function names, class names, variable names, paths. +- **Preserve Error Details**: Keep concrete error messages and their corresponding fix strategies verbatim. +- **Ignore Redundancy**: Skip duplicated tool output content; keep only the meaningful results. +- **Be Complete**: Do NOT omit any context that could be useful for continuing the work. +- **Be Concise**: Summarize efficiently; prefer bullet points over verbose prose. + +# Output Format +- Use clear, structured Markdown. +- Output in **English**. +- Organize extracted information under the 5 categories listed above.` + +// SummarizationClient 摘要LLM客户端接口(已在compact_types.go中定义) + +// LLMSummarizer LLM驱动的上下文摘要器 +type LLMSummarizer struct { + client SummarizationClient + config *Config +} + +// NewLLMSummarizer 创建LLM摘要器 +func NewLLMSummarizer(client SummarizationClient, config *Config) *LLMSummarizer { + return &LLMSummarizer{ + client: client, + config: config, + } +} + +// Summarize 对消息列表中的可压缩部分做LLM摘要 +// 输入: 完整消息列表 + 优先级信息 +// 输出: 替换方案 — 哪些消息被替换为摘要System消息 +func (s *LLMSummarizer) Summarize( + ctx context.Context, + messages []llm.Message, + priorities []MessagePriority, +) ([]llm.Message, error) { + if s.client == nil { + return messages, nil + } + + // 1. 分区:按优先级将消息分为保留区、摘要区 + keepRegion := make([]llm.Message, 0) + summaryRegion := make([]llm.Message, 0) + + for i, p := range priorities { + msg := messages[i] + + // 始终保留的消息 + if p.IsSystem || p.IsUser || p.IsRecent { + keepRegion = append(keepRegion, msg) + continue + } + + // 早期对话轻微保留(保留第一条和最后一条作为上下文锚点) + if p.IsEarly { + if i == 0 || i == len(messages)/3-1 { + keepRegion = append(keepRegion, msg) + continue + } + } + + // 其余消息进入摘要区 + summaryRegion = append(summaryRegion, msg) + } + + // 如果没有可摘要的消息,直接返回原始消息 + if len(summaryRegion) == 0 { + slog.Debug("LLM summarizer: no messages to summarize") + return messages, nil + } + + slog.Info("LLM summarizer: summarizing messages", + "total_messages", len(messages), + "keep_region", len(keepRegion), + "summary_region", len(summaryRegion)) + + // 2. 分段:将摘要区消息按token限制分为多个批次 + batches := s.segmentMessages(summaryRegion) + + // 3. 并发摘要:对每个批次调用LLM + summaryResults := make([]string, len(batches)) + var wg sync.WaitGroup + var firstErr error + var errMu sync.Mutex + + for i, batch := range batches { + wg.Add(1) + go func(idx int, batchMsgs []llm.Message) { + defer wg.Done() + + // 创建带超时的上下文 + sumCtx, cancel := context.WithTimeout(ctx, s.config.SummarizationTimeout) + defer cancel() + + summary, err := s.client.GenerateSummary(sumCtx, batchMsgs) + if err != nil { + errMu.Lock() + if firstErr == nil { + firstErr = fmt.Errorf("batch %d summarization failed: %w", idx, err) + } + errMu.Unlock() + return + } + summaryResults[idx] = summary + }(i, batch) + } + + wg.Wait() + + if firstErr != nil { + slog.Warn("LLM summarization partially failed", "error", firstErr) + // 部分失败:使用非空的摘要结果 + var validSummaries []string + for _, s := range summaryResults { + if s != "" { + validSummaries = append(validSummaries, s) + } + } + if len(validSummaries) == 0 { + return messages, fmt.Errorf("all summarization batches failed") + } + summaryResults = validSummaries + } + + // 4. 合并:将所有摘要合并为一条System消息 + summaryPrompt := s.config.SummarizationPrompt + if summaryPrompt == "" { + summaryPrompt = defaultSummarizationPrompt + } + + var fullSummary strings.Builder + fullSummary.WriteString(summaryPrompt + "\n\n---对话摘要---\n\n") + for i, summary := range summaryResults { + fullSummary.WriteString(fmt.Sprintf("## 摘要段 %d\n%s\n\n", i+1, summary)) + } + + // 5. 构建结果:[原始System消息] + [摘要System消息] + [保留区消息] + result := s.buildResult(messages, keepRegion, fullSummary.String()) + + slog.Info("LLM summarization completed", + "original_messages", len(messages), + "result_messages", len(result), + "summaries_generated", len(summaryResults)) + + return result, nil +} + +// calculateThreshold 计算优先级阈值 +// 取所有消息优先级的中位数作为分界线 +func (s *LLMSummarizer) calculateThreshold(priorities []MessagePriority) float64 { + if len(priorities) == 0 { + return 5.0 + } + // 简单取前70%分数作为阈值 + idx := len(priorities) * 7 / 10 + if idx >= len(priorities) { + idx = len(priorities) - 1 + } + return priorities[idx].Score +} + +// segmentMessages 将摘要区消息按token限制分段 +// 每段不超过 SummarizationMaxInputTokens +func (s *LLMSummarizer) segmentMessages(messages []llm.Message) [][]llm.Message { + if len(messages) == 0 { + return nil + } + + maxTokens := s.config.SummarizationMaxInputTokens + if maxTokens <= 0 { + maxTokens = 8000 // 默认值 + } + + var batches [][]llm.Message + var currentBatch []llm.Message + var currentTokens int + + getApproxTokens := func(content string) int { + // 粗略估算:约4个字符=1个token + return len([]rune(content)) / 4 + } + + for _, msg := range messages { + msgTokens := getApproxTokens(msg.Content) + + // 单条消息就超限,强制拆分为一段 + if msgTokens > maxTokens && len(currentBatch) == 0 { + // 直接加入当前批次,让后续逻辑处理 + currentBatch = append(currentBatch, msg) + currentTokens = msgTokens + continue + } + + // 当前批次加上这条消息会超限 + if currentTokens+msgTokens > maxTokens && len(currentBatch) > 0 { + batches = append(batches, currentBatch) + currentBatch = []llm.Message{msg} + currentTokens = msgTokens + } else { + currentBatch = append(currentBatch, msg) + currentTokens += msgTokens + } + } + + // 添加最后一个批次 + if len(currentBatch) > 0 { + batches = append(batches, currentBatch) + } + + // 如果没有批次(空消息),返回nil + if len(batches) == 0 { + return nil + } + + return batches +} + +// buildResult 构建压缩后的消息列表 +// 规则:[原始System消息] + [摘要System消息] + [保留区消息] +func (s *LLMSummarizer) buildResult( + originalMessages []llm.Message, + keepRegion []llm.Message, + summary string, +) []llm.Message { + result := make([]llm.Message, 0, len(keepRegion)+2) + + // 始终保留原始System消息(如果存在) + if len(originalMessages) > 0 && originalMessages[0].Role == llm.RoleSystem { + result = append(result, originalMessages[0]) + } + + // 添加摘要消息(作为System消息) + if summary != "" { + result = append(result, llm.Message{ + Role: llm.RoleSystem, + Content: "[CONTEXT SUMMARY]\n" + summary, + }) + } + + // 添加保留区消息 + result = append(result, keepRegion...) + + return result +} + +// ───────────────────────────────────────────────────────── +// 适配器:将 llm.Engine 适配为 SummarizationClient +// ───────────────────────────────────────────────────────── + +// SummaryAdapter 将 llm.Engine 适配为 SummarizationClient +type SummaryAdapter struct { + LLM llm.Engine + Model string + Temperature float64 + MaxTokens int +} + +// GenerateSummary 实现 SummarizationClient 接口 +func (a *SummaryAdapter) GenerateSummary(ctx context.Context, messages []llm.Message) (string, error) { + // 构造摘要请求 + systemMsg := llm.Message{ + Role: llm.RoleSystem, + Content: defaultSummarizationPrompt, + } + allMessages := append([]llm.Message{systemMsg}, messages...) + + opts := &llm.CallOptions{ + MaxTokens: a.MaxTokens, + Temperature: a.Temperature, + } + + resp, err := a.LLM.GenerateContent(ctx, allMessages, nil, opts) + if err != nil { + return "", fmt.Errorf("summarization failed: %w", err) + } + if len(resp.Choices) == 0 { + return "", fmt.Errorf("summarization returned empty response") + } + + return resp.Choices[0].Content, nil +} diff --git a/internal/config/config.go b/internal/config/config.go index 05f12db..94b2e76 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -106,8 +106,8 @@ type Config struct { Compact ContextCompactConfig `toml:"context"` // [context] - 上下文压缩配置 } -// getProvider returns a provider config by name from the shared provider pool. -func (c *Config) getProvider(name string) (*ProviderConfig, error) { +// GetProvider returns a provider config by name from the shared provider pool. +func (c *Config) GetProvider(name string) (*ProviderConfig, error) { if name == "" { return nil, fmt.Errorf("empty provider name") } @@ -196,20 +196,20 @@ func (c *Config) ResolveProvider(agentName, toolName string) (*ProviderConfig, e // 1-2. Tool-level override (highest priority) if toolName != "" { if name := c.resolveToolProvider(toolName); name != "" { - return c.getProvider(name) + return c.GetProvider(name) } } // 3-4. Agent-level override if agentName != "" { if name := c.resolveAgentProvider(agentName); name != "" { - return c.getProvider(name) + return c.GetProvider(name) } } // 5. Global override if c.Global.LLM != nil && c.Global.LLM.UseProvider != "" { - return c.getProvider(c.Global.LLM.UseProvider) + return c.GetProvider(c.Global.LLM.UseProvider) } // 6. No provider configured @@ -309,7 +309,7 @@ func (c *Config) validate() error { return fmt.Errorf("no providers configured in LLM section") } - activeProvider, err := c.getProvider(effectiveProvider) + activeProvider, err := c.GetProvider(effectiveProvider) if err != nil { return err } @@ -354,6 +354,10 @@ type ContextCompactConfig struct { // SummarizationModel 用于L1摘要的轻量模型 SummarizationModel string `toml:"summarization_model"` + // SummarizationProvider 用于L1摘要的LLM provider名称(可选,指向 providers 中定义的 provider) + // 为空则复用主 agent 的 LLM 引擎 + SummarizationProvider string `toml:"summarization_provider"` + // L1Threshold 触发L1压缩的阈值 L1Threshold int `toml:"l1_token_threshold"` @@ -371,4 +375,7 @@ type ContextCompactConfig struct { // KeepTaskConclusions 保留已完成任务的结论数 KeepTaskConclusions int `toml:"keep_task_conclusions"` + + // SummarizationMaxInputTokens 摘要时单批次最大输入token数 + SummarizationMaxInputTokens int `toml:"summarization_max_input_tokens"` } diff --git a/internal/llm/llm.go b/internal/llm/llm.go index 936a31a..5c970ef 100644 --- a/internal/llm/llm.go +++ b/internal/llm/llm.go @@ -65,6 +65,11 @@ type LoggingEngine struct { inner Engine } +// NewLoggingEngine creates a LoggingEngine that wraps the given inner engine +func NewLoggingEngine(inner Engine) *LoggingEngine { + return &LoggingEngine{inner: inner} +} + func (l *LoggingEngine) GenerateContent(ctx context.Context, messages []Message, tools []ToolDef, opts *CallOptions) (*Response, error) { if msgsJSON, err := json.MarshalIndent(messages, "", " "); err == nil { LogLLMContent("LLM Input (messages)", string(msgsJSON))