iohub · iohub · May 6, 2026 · May 6, 2026
diff --git a/config/config.toml b/config/config.toml
@@ -164,6 +164,10 @@ enable_auto_compact = true
 # 用于L1摘要的轻量模型 (可选，不配置则跳过L1)
 # summarization_model = "gpt-3.5-turbo"
 
+# 用于L1摘要的provider（可选，指向 [global.llm.providers] 中定义的 provider name）
+# 不配置则复用主 agent 的 LLM 引擎
+# summarization_provider = "siliconflow"
+
 # 触发各级压缩的阈值
 l1_token_threshold = 160000
 l2_token_threshold = 130000

diff --git a/internal/agents/conductor.go b/internal/agents/conductor.go
@@ -66,6 +66,7 @@ type ConductorAgent struct {
 	customAgents   map[string]*CustomAgent   // delegate_<name> → agent design
 	compactEngine  *compact.Engine           // 上下文压缩引擎
 	compactConfig  *compact.Config           // 压缩配置
+	summaryEngine  llm.Engine                // 独立的摘要 LLM 引擎（nil 则复用主引擎）
 }
 
 // loadProjectContext 读取工作区目录下的项目上下文文件（CODEACTOR.md、CLAUDE.md、AGENTS.md），
@@ -97,7 +98,7 @@ func (a *ConductorAgent) loadProjectContext() *ProjectContextLoadResult {
 	return result
 }
 
-func NewConductorAgent(globalCtx *globalctx.GlobalCtx, engine llm.Engine, repo *RepoAgent, coding *CodingAgent, chat *ChatAgent, meta *MetaAgent, devops *DevOpsAgent, maxSteps int, disabledAgents map[string]bool, metaRetryCount int, compactCfg *compact.Config) *ConductorAgent {
+func NewConductorAgent(globalCtx *globalctx.GlobalCtx, engine llm.Engine, repo *RepoAgent, coding *CodingAgent, chat *ChatAgent, meta *MetaAgent, devops *DevOpsAgent, maxSteps int, disabledAgents map[string]bool, metaRetryCount int, compactCfg *compact.Config, summaryEngine llm.Engine) *ConductorAgent {
 	// self-reference for closures that need the ConductorAgent after construction
 	var self *ConductorAgent
 	delegateRepo := tools.NewAdapter("delegate_repo", "Delegate analysis task to Repo-Agent", func(ctx context.Context, params map[string]interface{}) (interface{}, error) {
@@ -331,6 +332,7 @@ func NewConductorAgent(globalCtx *globalctx.GlobalCtx, engine llm.Engine, repo *
 		customAgents:   make(map[string]*CustomAgent),
 		compactEngine:  nil, // 将在 Run 方法中根据配置初始化
 		compactConfig:  compactCfg,
+		summaryEngine:  summaryEngine,
 	}
 	return self
 }
@@ -627,6 +629,20 @@ func (a *ConductorAgent) Run(ctx context.Context, input string, mem *memory.Conv
 		}
 	}
 
+	// ═══════ 初始化上下文压缩引擎 ═══════
+	if a.compactEngine == nil && a.compactConfig != nil && a.compactConfig.EnableAutoCompact {
+		summaryClient := a.createSummaryClient()
+		engine, err := compact.NewEngine(a.compactConfig, summaryClient)
+		if err != nil {
+			slog.Warn("Failed to create compact engine", "error", err)
+		} else {
+			a.compactEngine = engine
+			slog.Info("Context compact engine initialized",
+				"strategy", a.compactConfig.Strategy.String(),
+				"max_tokens", a.compactConfig.MaxContextTokens)
+		}
+	}
+
 	var messages []llm.Message
 
 	// Always start with System Prompt (with any registered custom agents appended)
@@ -805,3 +821,76 @@ func (a *ConductorAgent) Run(ctx context.Context, input string, mem *memory.Conv
 
 	return "", fmt.Errorf("ConductorAgent exceeded max steps")
 }
+
+// createSummaryClient 创建用于上下文摘要的轻量LLM客户端
+// 如果配置了独立的 summaryEngine 则优先使用，否则复用主引擎
+func (a *ConductorAgent) createSummaryClient() compact.SummarizationClient {
+	engine := a.LLM
+	if a.summaryEngine != nil {
+		engine = a.summaryEngine
+	}
+	return &summaryClientAdapter{
+		LLM:         engine,
+		Model:       a.compactConfig.SummarizationModel,
+		Temperature: 0.1, // 摘要使用低温，确保一致性
+		MaxTokens:   2000, // 摘要输出限制
+	}
+}
+
+// summaryClientAdapter 将 llm.Engine 适配为 compact.SummarizationClient
+type summaryClientAdapter struct {
+	LLM         llm.Engine
+	Model       string
+	Temperature float64
+	MaxTokens   int
+}
+
+func (s *summaryClientAdapter) GenerateSummary(ctx context.Context, messages []llm.Message) (string, error) {
+	// 构造摘要请求：System prompt + 待摘要消息
+	allMessages := append([]llm.Message{
+		{
+			Role:    llm.RoleSystem,
+			Content: getSummarizationPrompt(),
+		},
+	}, messages...)
+
+	opts := &llm.CallOptions{
+		MaxTokens:   s.MaxTokens,
+		Temperature: s.Temperature,
+	}
+	resp, err := s.LLM.GenerateContent(ctx, allMessages, nil, opts)
+	if err != nil {
+		return "", fmt.Errorf("summarization failed: %w", err)
+	}
+	if len(resp.Choices) == 0 {
+		return "", fmt.Errorf("summarization returned empty response")
+	}
+	return resp.Choices[0].Content, nil
+}
+
+// getSummarizationPrompt 返回默认摘要提示词（英文版本）
+func getSummarizationPrompt() string {
+	return `# Role
+You are a **Conversation Summarizer** for an AI-powered coding assistant system. Your task is to compress conversation history without losing any critical context needed for ongoing development work.
+
+# Task
+Extract the following from the provided conversation fragment:
+
+1. **Task Progress**: What tasks have been completed? What is currently in progress?
+2. **Key Decisions**: What important architectural or design decisions were made? Why?
+3. **Code Changes**: Which files were modified? What are the key code patterns introduced?
+4. **Errors & Fixes**: What problems were encountered? How were they resolved?
+5. **Critical Discoveries**: Important facts about the codebase — file structure, dependencies, tech stack, conventions, etc.
+
+# Rules
+- **Preserve Identifiers**: Retain ALL specific identifiers — file names, function names, class names, variable names, paths.
+- **Preserve Error Details**: Keep concrete error messages and their corresponding fix strategies verbatim.
+- **Ignore Redundancy**: Skip duplicated tool output content; keep only the meaningful results.
+- **Be Complete**: Do NOT omit any context that could be useful for continuing the work.
+- **Be Concise**: Summarize efficiently; prefer bullet points over verbose prose.
+
+# Output Format
+- Use clear, structured Markdown.
+- Output in **English**.
+- Organize extracted information under the 5 categories listed above.`
+}
diff --git a/internal/agents/conductor_test.go b/internal/agents/conductor_test.go
@@ -55,7 +55,7 @@ func newTestConductorAgent(t *testing.T, workDir string) *ConductorAgent {
 	t.Helper()
 	gctx := newTestGlobalCtx(workDir)
 	engine := &mockEngine{}
-	return NewConductorAgent(gctx, engine, nil, nil, nil, nil, nil, 10, nil, 3, nil)
+	return NewConductorAgent(gctx, engine, nil, nil, nil, nil, nil, 10, nil, 3, nil, nil)
 }
 
 // makeMetaOutput builds a valid Meta-Agent JSON output string.
@@ -351,7 +351,7 @@ func TestCustomAgentDelegateTool_Execution(t *testing.T) {
 	}
 
 	// Build conductor with mocked LLM
-	conductor := NewConductorAgent(gctx, customEngine, nil, nil, nil, nil, nil, 10, nil, 3, nil)
+	conductor := NewConductorAgent(gctx, customEngine, nil, nil, nil, nil, nil, 10, nil, 3, nil, nil)
 
 	ca := &CustomAgent{
 		Name:         "test_executor",
@@ -414,7 +414,7 @@ func TestCustomAgentDelegateTool_FinishTerminates(t *testing.T) {
 		},
 	}
 
-	conductor := NewConductorAgent(gctx, customEngine, nil, nil, nil, nil, nil, 10, nil, 3, nil)
+	conductor := NewConductorAgent(gctx, customEngine, nil, nil, nil, nil, nil, 10, nil, 3, nil, nil)
 
 	ca := &CustomAgent{
 		Name:         "finisher",
@@ -545,7 +545,7 @@ func TestDelegateMeta_DynamicRegistration(t *testing.T) {
 	metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(metaOutput))
 
 	// ConductorAgent
-	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil)
+	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil, nil)
 	initialAdapterCount := len(conductor.Adapters)
 
 	// Find and call delegate_meta tool
@@ -621,7 +621,7 @@ func TestDelegateMeta_DuplicateRegistrationPrevented(t *testing.T) {
 	)
 
 	metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(metaOutput))
-	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil)
+	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil, nil)
 
 	// Call delegate_meta twice with the same agent design
 	var delegateMeta *tools.Adapter
@@ -661,7 +661,7 @@ func TestDelegateMeta_ParseFailure_ReturnsRawOutput(t *testing.T) {
 	// Meta-Agent returns malformed output (no execution_result block)
 	malformedOutput := "Just some plain text without structured blocks."
 	metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(malformedOutput))
-	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil)
+	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil, nil)
 
 	var delegateMeta *tools.Adapter
 	for _, ad := range conductor.Adapters {
@@ -704,7 +704,7 @@ func TestDelegateMeta_EmptyAgentName_NoRegistration(t *testing.T) {
 		[]string{"read_file"},
 	)
 	metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(metaOutput))
-	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil)
+	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil, nil)
 
 	var delegateMeta *tools.Adapter
 	for _, ad := range conductor.Adapters {
@@ -734,7 +734,7 @@ func TestDelegateMeta_NoAgentDesign_NoRegistration(t *testing.T) {
 	output := `{"thinking": "designing...", "agent_name": "Test Agent", "tools_used": ["read_file"], "result": {"key": "value"}}`
 
 	metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(output))
-	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil)
+	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil, nil)
 
 	var delegateMeta *tools.Adapter
 	for _, ad := range conductor.Adapters {

diff --git a/internal/app/app.go b/internal/app/app.go
@@ -159,23 +159,35 @@ func (ca *CodingAssistant) Init(engine llm.Engine, workDir string) {
 	devopsAgent := agents.NewDevOpsAgent(ca.globalCtx, devopsEngine, devopsMaxSteps)
 	// 构建 compact config
 	var compactCfg *compact.Config
+	var summaryEngine llm.Engine
 	if ca.config != nil {
 		c := &ca.config.Compact
 		compactCfg = compact.ConfigFrom(
 			c.MaxContextTokens,
 			c.Strategy,
 			c.EnableAutoCompact,
 			c.SummarizationModel,
+			c.SummarizationProvider,
 			c.L1Threshold,
 			c.L2Threshold,
 			c.L3Threshold,
 			c.SummarizationTimeout,
 			c.KeepRecentRounds,
 			c.KeepTaskConclusions,
+			c.SummarizationMaxInputTokens,
 		)
+
+		// 为 compact 摘要创建独立的 LLM 引擎（如果配置了 summarization_provider）
+		if c.SummarizationProvider != "" {
+			provider, err := ca.config.GetProvider(c.SummarizationProvider)
+			if err == nil {
+				summaryEngine = llm.NewOpenAIEngine(provider.APIBaseURL, provider.APIKey, provider.Model)
+				summaryEngine = llm.NewLoggingEngine(summaryEngine)
+			}
+		}
 	}
 
-	ca.conductor = agents.NewConductorAgent(ca.globalCtx, conductorEngine, repoAgent, codingAgent, chatAgent, metaAgent, devopsAgent, conductorMaxSteps, disabledAgents, metaRetryCount, compactCfg)
+	ca.conductor = agents.NewConductorAgent(ca.globalCtx, conductorEngine, repoAgent, codingAgent, chatAgent, metaAgent, devopsAgent, conductorMaxSteps, disabledAgents, metaRetryCount, compactCfg, summaryEngine)
 }
 
 func (ca *CodingAssistant) IntegrateMessaging(dispatcher *messaging.MessageDispatcher) {

diff --git a/internal/compact/compact_config.go b/internal/compact/compact_config.go
@@ -20,6 +20,9 @@ type Config struct {
 	// SummarizationModel 用于L1摘要的轻量模型
 	SummarizationModel string `toml:"summarization_model"`
 
+	// SummarizationProvider 摘要使用的 LLM provider 名称
+	SummarizationProvider string `toml:"summarization_provider"`
+
 	// L1Threshold 触发L1压缩的阈值
 	L1Threshold int `toml:"l1_token_threshold"`
 
@@ -37,20 +40,27 @@ type Config struct {
 
 	// KeepTaskConclusions 保留已完成任务的结论数
 	KeepTaskConclusions int `toml:"keep_task_conclusions"`
+
+	// SummarizationMaxInputTokens 摘要时单批次最大输入token数
+	SummarizationMaxInputTokens int `toml:"summarization_max_input_tokens"`
+
+	// SummarizationPrompt 自定义摘要提示词（可选，空则用默认）
+	SummarizationPrompt string `toml:"summarization_prompt"`
 }
 
 // DefaultConfig 默认配置
 var DefaultConfig = Config{
-	MaxContextTokens:     198000, // 198k
-	Strategy:             StrategyBalanced,
-	EnableAutoCompact:    true,
-	SummarizationModel:   "gpt-3.5-turbo", // 或claude-3-haiku
-	L1Threshold:          160000,
-	L2Threshold:          130000,
-	L3Threshold:          100000,
-	SummarizationTimeout: 15 * time.Second,
-	KeepRecentRounds:     3, // 保留最近3轮完整对话
-	KeepTaskConclusions:  2, // 保留最近2个已完成任务的结论
+	MaxContextTokens:          198000, // 198k
+	Strategy:                  StrategyBalanced,
+	EnableAutoCompact:         true,
+	SummarizationModel:        "gpt-3.5-turbo", // 或claude-3-haiku
+	L1Threshold:               160000,
+	L2Threshold:               130000,
+	L3Threshold:               100000,
+	SummarizationTimeout:      15 * time.Second,
+	KeepRecentRounds:          3, // 保留最近3轮完整对话
+	KeepTaskConclusions:       2, // 保留最近2个已完成任务的结论
+	SummarizationMaxInputTokens: 8000,  // 单批次最大输入
 }
 
 func (c *Config) Validate() error {
@@ -68,19 +78,21 @@ func (c *Config) Validate() error {
 
 // ConfigFrom 从外部配置结构创建 compact.Config
 // 用于打破 config -> compact -> llm -> config 的循环依赖
-func ConfigFrom(maxTokens int, strategyStr string, enableAuto bool, model string,
-	l1, l2, l3 int, timeoutSec, keepRounds, keepConclusions int) *Config {
+func ConfigFrom(maxTokens int, strategyStr string, enableAuto bool, model string, summarizationProvider string,
+	l1, l2, l3 int, timeoutSec, keepRounds, keepConclusions, summaryMaxInputTokens int) *Config {
 	return &Config{
-		MaxContextTokens:     maxTokens,
-		Strategy:             parseStrategy(strategyStr),
-		EnableAutoCompact:    enableAuto,
-		SummarizationModel:   model,
-		L1Threshold:          l1,
-		L2Threshold:          l2,
-		L3Threshold:          l3,
-		SummarizationTimeout: time.Duration(timeoutSec) * time.Second,
-		KeepRecentRounds:     keepRounds,
-		KeepTaskConclusions:  keepConclusions,
+		MaxContextTokens:          maxTokens,
+		Strategy:                  parseStrategy(strategyStr),
+		EnableAutoCompact:         enableAuto,
+		SummarizationModel:        model,
+		SummarizationProvider:     summarizationProvider,
+		L1Threshold:               l1,
+		L2Threshold:               l2,
+		L3Threshold:               l3,
+		SummarizationTimeout:      time.Duration(timeoutSec) * time.Second,
+		KeepRecentRounds:          keepRounds,
+		KeepTaskConclusions:       keepConclusions,
+		SummarizationMaxInputTokens: summaryMaxInputTokens,
 	}
 }