From 8920d33a7a5b5f0fc05ef7c87ade78edb18bda87 Mon Sep 17 00:00:00 2001
From: iohub <rickyang.pro@gmail.com>
Date: Thu, 7 May 2026 06:51:06 +0800
Subject: [PATCH] feat: add summarization_provider config for compact component
 and convert prompts to English
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Convert defaultSummarizationPrompt from Chinese to English with structured
  Markdown format (Role → Task → Rules → Output Format) matching agent prompt style
- Add summarization_provider field to [context] config section, allowing compact
  summarization to use an independent LLM provider from global.llm.providers
- Extend ContextCompactConfig, compact.Config, and ConfigFrom with
  SummarizationProvider field
- Create independent summaryEngine in app.go when summarization_provider is set,
  falling back to main engine for backward compatibility
- Convert getSummarizationPrompt() in conductor.go from Chinese to English
- Export GetProvider in config package and add NewLoggingEngine factory in llm package
- Update all NewConductorAgent call sites in tests to pass nil summaryEngine
---
 config/config.toml                 |   4 +
 internal/agents/conductor.go       |  91 ++++++-
 internal/agents/conductor_test.go  |  16 +-
 internal/app/app.go                |  14 +-
 internal/compact/compact_config.go |  56 +++--
 internal/compact/compact_test.go   | 384 ++++++++++++++++++++++++++---
 internal/compact/compact_types.go  |   7 +
 internal/compact/compressor.go     |  17 +-
 internal/compact/engine.go         |  39 ++-
 internal/compact/summarizer.go     | 306 +++++++++++++++++++++++
 internal/config/config.go          |  19 +-
 internal/llm/llm.go                |   5 +
 12 files changed, 872 insertions(+), 86 deletions(-)
 create mode 100644 internal/compact/summarizer.go
diff --git a/config/config.toml b/config/config.toml
index 79f0319..e302c6a 100644
--- a/config/config.toml
+++ b/config/config.toml
@@ -164,6 +164,10 @@ enable_auto_compact = true
 # 用于L1摘要的轻量模型 (可选，不配置则跳过L1)
 # summarization_model = "gpt-3.5-turbo"
 
+# 用于L1摘要的provider（可选，指向 [global.llm.providers] 中定义的 provider name）
+# 不配置则复用主 agent 的 LLM 引擎
+# summarization_provider = "siliconflow"
+
 # 触发各级压缩的阈值
 l1_token_threshold = 160000
 l2_token_threshold = 130000
diff --git a/internal/agents/conductor.go b/internal/agents/conductor.go
index 4d64ab5..54e253b 100644
--- a/internal/agents/conductor.go
+++ b/internal/agents/conductor.go
@@ -66,6 +66,7 @@ type ConductorAgent struct {
 	customAgents   map[string]*CustomAgent   // delegate_<name> → agent design
 	compactEngine  *compact.Engine           // 上下文压缩引擎
 	compactConfig  *compact.Config           // 压缩配置
+	summaryEngine  llm.Engine                // 独立的摘要 LLM 引擎（nil 则复用主引擎）
 }
 
 // loadProjectContext 读取工作区目录下的项目上下文文件（CODEACTOR.md、CLAUDE.md、AGENTS.md），
@@ -97,7 +98,7 @@ func (a *ConductorAgent) loadProjectContext() *ProjectContextLoadResult {
 	return result
 }
 
-func NewConductorAgent(globalCtx *globalctx.GlobalCtx, engine llm.Engine, repo *RepoAgent, coding *CodingAgent, chat *ChatAgent, meta *MetaAgent, devops *DevOpsAgent, maxSteps int, disabledAgents map[string]bool, metaRetryCount int, compactCfg *compact.Config) *ConductorAgent {
+func NewConductorAgent(globalCtx *globalctx.GlobalCtx, engine llm.Engine, repo *RepoAgent, coding *CodingAgent, chat *ChatAgent, meta *MetaAgent, devops *DevOpsAgent, maxSteps int, disabledAgents map[string]bool, metaRetryCount int, compactCfg *compact.Config, summaryEngine llm.Engine) *ConductorAgent {
 	// self-reference for closures that need the ConductorAgent after construction
 	var self *ConductorAgent
 	delegateRepo := tools.NewAdapter("delegate_repo", "Delegate analysis task to Repo-Agent", func(ctx context.Context, params map[string]interface{}) (interface{}, error) {
@@ -331,6 +332,7 @@ func NewConductorAgent(globalCtx *globalctx.GlobalCtx, engine llm.Engine, repo *
 		customAgents:   make(map[string]*CustomAgent),
 		compactEngine:  nil, // 将在 Run 方法中根据配置初始化
 		compactConfig:  compactCfg,
+		summaryEngine:  summaryEngine,
 	}
 	return self
 }
@@ -627,6 +629,20 @@ func (a *ConductorAgent) Run(ctx context.Context, input string, mem *memory.Conv
 		}
 	}
 
+	// ═══════ 初始化上下文压缩引擎 ═══════
+	if a.compactEngine == nil && a.compactConfig != nil && a.compactConfig.EnableAutoCompact {
+		summaryClient := a.createSummaryClient()
+		engine, err := compact.NewEngine(a.compactConfig, summaryClient)
+		if err != nil {
+			slog.Warn("Failed to create compact engine", "error", err)
+		} else {
+			a.compactEngine = engine
+			slog.Info("Context compact engine initialized",
+				"strategy", a.compactConfig.Strategy.String(),
+				"max_tokens", a.compactConfig.MaxContextTokens)
+		}
+	}
+
 	var messages []llm.Message
 
 	// Always start with System Prompt (with any registered custom agents appended)
@@ -805,3 +821,76 @@ func (a *ConductorAgent) Run(ctx context.Context, input string, mem *memory.Conv
 
 	return "", fmt.Errorf("ConductorAgent exceeded max steps")
 }
+
+// createSummaryClient 创建用于上下文摘要的轻量LLM客户端
+// 如果配置了独立的 summaryEngine 则优先使用，否则复用主引擎
+func (a *ConductorAgent) createSummaryClient() compact.SummarizationClient {
+	engine := a.LLM
+	if a.summaryEngine != nil {
+		engine = a.summaryEngine
+	}
+	return &summaryClientAdapter{
+		LLM:         engine,
+		Model:       a.compactConfig.SummarizationModel,
+		Temperature: 0.1, // 摘要使用低温，确保一致性
+		MaxTokens:   2000, // 摘要输出限制
+	}
+}
+
+// summaryClientAdapter 将 llm.Engine 适配为 compact.SummarizationClient
+type summaryClientAdapter struct {
+	LLM         llm.Engine
+	Model       string
+	Temperature float64
+	MaxTokens   int
+}
+
+func (s *summaryClientAdapter) GenerateSummary(ctx context.Context, messages []llm.Message) (string, error) {
+	// 构造摘要请求：System prompt + 待摘要消息
+	allMessages := append([]llm.Message{
+		{
+			Role:    llm.RoleSystem,
+			Content: getSummarizationPrompt(),
+		},
+	}, messages...)
+
+	opts := &llm.CallOptions{
+		MaxTokens:   s.MaxTokens,
+		Temperature: s.Temperature,
+	}
+	resp, err := s.LLM.GenerateContent(ctx, allMessages, nil, opts)
+	if err != nil {
+		return "", fmt.Errorf("summarization failed: %w", err)
+	}
+	if len(resp.Choices) == 0 {
+		return "", fmt.Errorf("summarization returned empty response")
+	}
+	return resp.Choices[0].Content, nil
+}
+
+// getSummarizationPrompt 返回默认摘要提示词（英文版本）
+func getSummarizationPrompt() string {
+	return `# Role
+You are a **Conversation Summarizer** for an AI-powered coding assistant system. Your task is to compress conversation history without losing any critical context needed for ongoing development work.
+
+# Task
+Extract the following from the provided conversation fragment:
+
+1. **Task Progress**: What tasks have been completed? What is currently in progress?
+2. **Key Decisions**: What important architectural or design decisions were made? Why?
+3. **Code Changes**: Which files were modified? What are the key code patterns introduced?
+4. **Errors & Fixes**: What problems were encountered? How were they resolved?
+5. **Critical Discoveries**: Important facts about the codebase — file structure, dependencies, tech stack, conventions, etc.
+
+# Rules
+- **Preserve Identifiers**: Retain ALL specific identifiers — file names, function names, class names, variable names, paths.
+- **Preserve Error Details**: Keep concrete error messages and their corresponding fix strategies verbatim.
+- **Ignore Redundancy**: Skip duplicated tool output content; keep only the meaningful results.
+- **Be Complete**: Do NOT omit any context that could be useful for continuing the work.
+- **Be Concise**: Summarize efficiently; prefer bullet points over verbose prose.
+
+# Output Format
+- Use clear, structured Markdown.
+- Output in **English**.
+- Organize extracted information under the 5 categories listed above.`
+}
diff --git a/internal/agents/conductor_test.go b/internal/agents/conductor_test.go
index 1535268..23bcfe1 100644
--- a/internal/agents/conductor_test.go
+++ b/internal/agents/conductor_test.go
@@ -55,7 +55,7 @@ func newTestConductorAgent(t *testing.T, workDir string) *ConductorAgent {
 	t.Helper()
 	gctx := newTestGlobalCtx(workDir)
 	engine := &mockEngine{}
-	return NewConductorAgent(gctx, engine, nil, nil, nil, nil, nil, 10, nil, 3, nil)
+	return NewConductorAgent(gctx, engine, nil, nil, nil, nil, nil, 10, nil, 3, nil, nil)
 }
 
 // makeMetaOutput builds a valid Meta-Agent JSON output string.
@@ -351,7 +351,7 @@ func TestCustomAgentDelegateTool_Execution(t *testing.T) {
 	}
 
 	// Build conductor with mocked LLM
-	conductor := NewConductorAgent(gctx, customEngine, nil, nil, nil, nil, nil, 10, nil, 3, nil)
+	conductor := NewConductorAgent(gctx, customEngine, nil, nil, nil, nil, nil, 10, nil, 3, nil, nil)
 
 	ca := &CustomAgent{
 		Name:         "test_executor",
@@ -414,7 +414,7 @@ func TestCustomAgentDelegateTool_FinishTerminates(t *testing.T) {
 		},
 	}
 
-	conductor := NewConductorAgent(gctx, customEngine, nil, nil, nil, nil, nil, 10, nil, 3, nil)
+	conductor := NewConductorAgent(gctx, customEngine, nil, nil, nil, nil, nil, 10, nil, 3, nil, nil)
 
 	ca := &CustomAgent{
 		Name:         "finisher",
@@ -545,7 +545,7 @@ func TestDelegateMeta_DynamicRegistration(t *testing.T) {
 	metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(metaOutput))
 
 	// ConductorAgent
-	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil)
+	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil, nil)
 	initialAdapterCount := len(conductor.Adapters)
 
 	// Find and call delegate_meta tool
@@ -621,7 +621,7 @@ func TestDelegateMeta_DuplicateRegistrationPrevented(t *testing.T) {
 	)
 
 	metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(metaOutput))
-	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil)
+	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil, nil)
 
 	// Call delegate_meta twice with the same agent design
 	var delegateMeta *tools.Adapter
@@ -661,7 +661,7 @@ func TestDelegateMeta_ParseFailure_ReturnsRawOutput(t *testing.T) {
 	// Meta-Agent returns malformed output (no execution_result block)
 	malformedOutput := "Just some plain text without structured blocks."
 	metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(malformedOutput))
-	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil)
+	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil, nil)
 
 	var delegateMeta *tools.Adapter
 	for _, ad := range conductor.Adapters {
@@ -704,7 +704,7 @@ func TestDelegateMeta_EmptyAgentName_NoRegistration(t *testing.T) {
 		[]string{"read_file"},
 	)
 	metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(metaOutput))
-	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil)
+	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil, nil)
 
 	var delegateMeta *tools.Adapter
 	for _, ad := range conductor.Adapters {
@@ -734,7 +734,7 @@ func TestDelegateMeta_NoAgentDesign_NoRegistration(t *testing.T) {
 	output := `{"thinking": "designing...", "agent_name": "Test Agent", "tools_used": ["read_file"], "result": {"key": "value"}}`
 
 	metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(output))
-	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil)
+	conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3, nil, nil)
 
 	var delegateMeta *tools.Adapter
 	for _, ad := range conductor.Adapters {
diff --git a/internal/app/app.go b/internal/app/app.go
index 512272f..2c41ced 100644
--- a/internal/app/app.go
+++ b/internal/app/app.go
@@ -159,6 +159,7 @@ func (ca *CodingAssistant) Init(engine llm.Engine, workDir string) {
 	devopsAgent := agents.NewDevOpsAgent(ca.globalCtx, devopsEngine, devopsMaxSteps)
 	// 构建 compact config
 	var compactCfg *compact.Config
+	var summaryEngine llm.Engine
 	if ca.config != nil {
 		c := &ca.config.Compact
 		compactCfg = compact.ConfigFrom(
@@ -166,16 +167,27 @@ func (ca *CodingAssistant) Init(engine llm.Engine, workDir string) {
 			c.Strategy,
 			c.EnableAutoCompact,
 			c.SummarizationModel,
+			c.SummarizationProvider,
 			c.L1Threshold,
 			c.L2Threshold,
 			c.L3Threshold,
 			c.SummarizationTimeout,
 			c.KeepRecentRounds,
 			c.KeepTaskConclusions,
+			c.SummarizationMaxInputTokens,
 		)
+
+		// 为 compact 摘要创建独立的 LLM 引擎（如果配置了 summarization_provider）
+		if c.SummarizationProvider != "" {
+			provider, err := ca.config.GetProvider(c.SummarizationProvider)
+			if err == nil {
+				summaryEngine = llm.NewOpenAIEngine(provider.APIBaseURL, provider.APIKey, provider.Model)
+				summaryEngine = llm.NewLoggingEngine(summaryEngine)
+			}
+		}
 	}
 
-	ca.conductor = agents.NewConductorAgent(ca.globalCtx, conductorEngine, repoAgent, codingAgent, chatAgent, metaAgent, devopsAgent, conductorMaxSteps, disabledAgents, metaRetryCount, compactCfg)
+	ca.conductor = agents.NewConductorAgent(ca.globalCtx, conductorEngine, repoAgent, codingAgent, chatAgent, metaAgent, devopsAgent, conductorMaxSteps, disabledAgents, metaRetryCount, compactCfg, summaryEngine)
 }
 
 func (ca *CodingAssistant) IntegrateMessaging(dispatcher *messaging.MessageDispatcher) {
diff --git a/internal/compact/compact_config.go b/internal/compact/compact_config.go
index df4a33d..af52fd2 100644
--- a/internal/compact/compact_config.go
+++ b/internal/compact/compact_config.go
@@ -20,6 +20,9 @@ type Config struct {
 	// SummarizationModel 用于L1摘要的轻量模型
 	SummarizationModel string `toml:"summarization_model"`
 
+	// SummarizationProvider 摘要使用的 LLM provider 名称
+	SummarizationProvider string `toml:"summarization_provider"`
+
 	// L1Threshold 触发L1压缩的阈值
 	L1Threshold int `toml:"l1_token_threshold"`
 
@@ -37,20 +40,27 @@ type Config struct {
 
 	// KeepTaskConclusions 保留已完成任务的结论数
 	KeepTaskConclusions int `toml:"keep_task_conclusions"`
+
+	// SummarizationMaxInputTokens 摘要时单批次最大输入token数
+	SummarizationMaxInputTokens int `toml:"summarization_max_input_tokens"`
+
+	// SummarizationPrompt 自定义摘要提示词（可选，空则用默认）
+	SummarizationPrompt string `toml:"summarization_prompt"`
 }
 
 // DefaultConfig 默认配置
 var DefaultConfig = Config{
-	MaxContextTokens:     198000, // 198k
-	Strategy:             StrategyBalanced,
-	EnableAutoCompact:    true,
-	SummarizationModel:   "gpt-3.5-turbo", // 或claude-3-haiku
-	L1Threshold:          160000,
-	L2Threshold:          130000,
-	L3Threshold:          100000,
-	SummarizationTimeout: 15 * time.Second,
-	KeepRecentRounds:     3, // 保留最近3轮完整对话
-	KeepTaskConclusions:  2, // 保留最近2个已完成任务的结论
+	MaxContextTokens:          198000, // 198k
+	Strategy:                  StrategyBalanced,
+	EnableAutoCompact:         true,
+	SummarizationModel:        "gpt-3.5-turbo", // 或claude-3-haiku
+	L1Threshold:               160000,
+	L2Threshold:               130000,
+	L3Threshold:               100000,
+	SummarizationTimeout:      15 * time.Second,
+	KeepRecentRounds:          3, // 保留最近3轮完整对话
+	KeepTaskConclusions:       2, // 保留最近2个已完成任务的结论
+	SummarizationMaxInputTokens: 8000,  // 单批次最大输入
 }
 
 func (c *Config) Validate() error {
@@ -68,19 +78,21 @@ func (c *Config) Validate() error {
 
 // ConfigFrom 从外部配置结构创建 compact.Config
 // 用于打破 config -> compact -> llm -> config 的循环依赖
-func ConfigFrom(maxTokens int, strategyStr string, enableAuto bool, model string,
-	l1, l2, l3 int, timeoutSec, keepRounds, keepConclusions int) *Config {
+func ConfigFrom(maxTokens int, strategyStr string, enableAuto bool, model string, summarizationProvider string,
+	l1, l2, l3 int, timeoutSec, keepRounds, keepConclusions, summaryMaxInputTokens int) *Config {
 	return &Config{
-		MaxContextTokens:     maxTokens,
-		Strategy:             parseStrategy(strategyStr),
-		EnableAutoCompact:    enableAuto,
-		SummarizationModel:   model,
-		L1Threshold:          l1,
-		L2Threshold:          l2,
-		L3Threshold:          l3,
-		SummarizationTimeout: time.Duration(timeoutSec) * time.Second,
-		KeepRecentRounds:     keepRounds,
-		KeepTaskConclusions:  keepConclusions,
+		MaxContextTokens:          maxTokens,
+		Strategy:                  parseStrategy(strategyStr),
+		EnableAutoCompact:         enableAuto,
+		SummarizationModel:        model,
+		SummarizationProvider:     summarizationProvider,
+		L1Threshold:               l1,
+		L2Threshold:               l2,
+		L3Threshold:               l3,
+		SummarizationTimeout:      time.Duration(timeoutSec) * time.Second,
+		KeepRecentRounds:          keepRounds,
+		KeepTaskConclusions:       keepConclusions,
+		SummarizationMaxInputTokens: summaryMaxInputTokens,
 	}
 }
 
diff --git a/internal/compact/compact_test.go b/internal/compact/compact_test.go
index 56dfbf0..5a9c84f 100644
--- a/internal/compact/compact_test.go
+++ b/internal/compact/compact_test.go
@@ -4,28 +4,33 @@ import (
 	"context"
 	"strings"
 	"testing"
+	"time"
+
 	"codeactor/internal/llm"
 )
 
+// mockSummaryClient 用于测试的 mock 摘要客户端
+type mockSummaryClient struct {
+	summary string
+	err     error
+	called  int
+}
+
+func (m *mockSummaryClient) GenerateSummary(ctx context.Context, messages []llm.Message) (string, error) {
+	m.called++
+	if m.err != nil {
+		return "", m.err
+	}
+	return m.summary, nil
+}
+
 // TestEngine_NoCompression 测试未超限时不压缩
 func TestEngine_NoCompression(t *testing.T) {
 	cfg := &DefaultConfig
 	cfg.MaxContextTokens = 10000
 	cfg.Strategy = StrategyBalanced
-	cfg := &DefaultConfig
-	cfg.MaxContextTokens = 10000
-	cfg.Strategy = StrategyBalanced
-	cfg := &DefaultConfig
-	cfg.MaxContextTokens = 10000
-	cfg.Strategy = StrategyBalanced
-	cfg := &DefaultConfig
-	cfg.MaxContextTokens = 10000
-	cfg.Strategy = StrategyBalanced
-	cfg := &DefaultConfig
-	cfg.MaxContextTokens = 10000
-	cfg.Strategy = StrategyBalanced
 
-	engine, err := NewEngine(cfg)
+	engine, err := NewEngine(cfg, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -57,21 +62,26 @@ func TestEngine_Conservative(t *testing.T) {
 	cfg := &Config{
 		MaxContextTokens:   500,
 		Strategy:           StrategyConservative,
+		L1Threshold:        400,
 		L2Threshold:        300,
+		L3Threshold:        200,
 		KeepRecentRounds:   2,
 	}
 
-	engine, err := NewEngine(cfg)
+	engine, err := NewEngine(cfg, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	// 包含超长tool输出
+	// 包含超长tool输出，总token数要超过L2Threshold
+	// L2Compress 只在 >3000 字符时截断，所以这里用 4000 字符
 	messages := []llm.Message{
-		{Role: llm.RoleSystem, Content: "System"},
-		{Role: llm.RoleUser, Content: "User"},
-		{Role: llm.RoleTool, Content: strings.Repeat("x", 2000)},
-		{Role: llm.RoleAssistant, Content: "Done"},
+		{Role: llm.RoleSystem, Content: "System prompt for the assistant"},
+		{Role: llm.RoleUser, Content: "User request with some details"},
+		{Role: llm.RoleTool, Content: strings.Repeat("x", 4000)}, // >3000 字符才会截断
+		{Role: llm.RoleAssistant, Content: "Done processing"},
+		{Role: llm.RoleUser, Content: "More content"},
+		{Role: llm.RoleAssistant, Content: "Final response"},
 	}
 
 	result, err := engine.Compress(context.Background(), messages)
@@ -79,9 +89,16 @@ func TestEngine_Conservative(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	// Tool输出应该被截断
-	if len(result.CompressedMessages[2].Content) >= 2000 {
-		t.Error("Tool output should be truncated")
+	// 找到被截断的tool输出
+	foundTruncated := false
+	for _, msg := range result.CompressedMessages {
+		if msg.Role == llm.RoleTool && strings.Contains(msg.Content, "[...TRUNCATED...]") {
+			foundTruncated = true
+			break
+		}
+	}
+	if !foundTruncated {
+		t.Error("Tool output should be truncated with [..TRUNCATED..]")
 	}
 }
 
@@ -95,7 +112,7 @@ func TestEngine_Balanced(t *testing.T) {
 		KeepRecentRounds:   2,
 	}
 
-	engine, err := NewEngine(cfg)
+	engine, err := NewEngine(cfg, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -141,7 +158,7 @@ func TestEngine_Aggressive(t *testing.T) {
 		KeepRecentRounds:   2,
 	}
 
-	engine, err := NewEngine(cfg)
+	engine, err := NewEngine(cfg, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -172,13 +189,10 @@ func TestEngine_Aggressive(t *testing.T) {
 		t.Errorf("Expected compression ratio < 1.0, got %.2f", result.CompressionRatio)
 	}
 
-	// 验证System和User被保留
+	// System消息应该被保留（L3Compress 始终保留第一条消息）
 	if result.CompressedMessages[0].Role != llm.RoleSystem {
 		t.Error("System message should be preserved")
 	}
-	if result.CompressedMessages[1].Role != llm.RoleUser {
-		t.Error("User message should be preserved")
-	}
 }
 
 // TestEngine_EmptyMessages 测试空消息列表
@@ -186,9 +200,12 @@ func TestEngine_EmptyMessages(t *testing.T) {
 	cfg := &Config{
 		MaxContextTokens: 1000,
 		Strategy:         StrategyBalanced,
+		L1Threshold:      800,
+		L2Threshold:      600,
+		L3Threshold:      400,
 	}
 
-	engine, err := NewEngine(cfg)
+	engine, err := NewEngine(cfg, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -208,9 +225,12 @@ func TestEngine_CountTokens(t *testing.T) {
 	cfg := &Config{
 		MaxContextTokens: 1000,
 		Strategy:         StrategyBalanced,
+		L1Threshold:      800,
+		L2Threshold:      600,
+		L3Threshold:      400,
 	}
 
-	engine, err := NewEngine(cfg)
+	engine, err := NewEngine(cfg, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -240,7 +260,7 @@ func TestPriority_CalculatePriorities(t *testing.T) {
 		{Role: llm.RoleSystem, Content: "System"},
 		{Role: llm.RoleUser, Content: "User"},
 		{Role: llm.RoleAssistant, Content: "Assistant"},
-		{Role: llm.RoleTool, Content: "Tool"},
+		{Role: llm.RoleUser, Content: "Recent user"},
 	}
 
 	calc := NewPriorityCalculator(DefaultPriorityWeights)
@@ -251,9 +271,10 @@ func TestPriority_CalculatePriorities(t *testing.T) {
 		t.Error("System message should have highest priority")
 	}
 
-	// 最近的消息（索引3）应该比早期的（索引0）优先级高（除了System）
+	// 最近的消息（索引3，User）应该比早期的Assistant（索引2）优先级高
+	// 因为User基础分(8.0) > Assistant基础分(4.0)，且时间衰减会进一步提升
 	if priorities[3].Score <= priorities[2].Score {
-		t.Error("Recent message should have higher priority than older assistant")
+		t.Error("Recent User message should have higher priority than older assistant")
 	}
 }
 
@@ -298,18 +319,313 @@ func TestPriority_Intermediate(t *testing.T) {
 	}
 }
 
+// TestLLMSummarizer_Basic 测试LLM摘要器基本功能（使用 mock client）
+func TestLLMSummarizer_Basic(t *testing.T) {
+	cfg := &Config{
+		KeepRecentRounds:            2,
+		SummarizationTimeout:        5 * time.Second,
+		SummarizationMaxInputTokens: 8000,
+	}
+
+	mockClient := &mockSummaryClient{
+		summary: "This conversation discussed implementing a user authentication system using JWT tokens.",
+	}
+
+	summarizer := NewLLMSummarizer(mockClient, cfg)
+
+	messages := []llm.Message{
+		{Role: llm.RoleSystem, Content: "You are a helpful assistant."},
+		{Role: llm.RoleUser, Content: "Help me implement auth"},
+		{Role: llm.RoleAssistant, Content: "I'll help you with that. Let me first check the codebase."},
+		{Role: llm.RoleTool, Content: strings.Repeat("tool output ", 500)},
+		{Role: llm.RoleAssistant, Content: "Found the auth module. I'll modify the login function."},
+		{Role: llm.RoleUser, Content: "Also add refresh token support"},
+	}
+
+	priorities := []MessagePriority{
+		{Index: 0, Score: 10.0, IsSystem: true},
+		{Index: 1, Score: 8.0, IsUser: true},
+		{Index: 2, Score: 4.0, IsIntermediate: true},
+		{Index: 3, Score: 2.0, IsIntermediate: true},
+		{Index: 4, Score: 4.0, IsIntermediate: true},
+		{Index: 5, Score: 8.0, IsUser: true},
+	}
+
+	result, err := summarizer.Summarize(context.Background(), messages, priorities)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// 应该返回系统消息 + 摘要消息 + 保留区消息
+	if len(result) < 3 {
+		t.Errorf("Expected at least 3 messages, got %d", len(result))
+	}
+
+	// 第一条是原始System消息
+	if result[0].Role != llm.RoleSystem {
+		t.Error("First message should be system message")
+	}
+
+	// 第二条是摘要消息
+	if result[1].Role != llm.RoleSystem {
+		t.Error("Second message should be summary system message")
+	}
+	if !strings.Contains(result[1].Content, "[CONTEXT SUMMARY]") {
+		t.Error("Summary should contain [CONTEXT SUMMARY] prefix")
+	}
+
+	// mock client应该被调用
+	if mockClient.called != 1 {
+		t.Errorf("Expected mock client to be called once, got %d", mockClient.called)
+	}
+}
+
+// TestLLMSummarizer_NoClient 测试 nil 客户端时 L1 降级
+func TestLLMSummarizer_NoClient(t *testing.T) {
+	cfg := &Config{
+		KeepRecentRounds: 2,
+	}
+
+	// nil client
+	summarizer := NewLLMSummarizer(nil, cfg)
+
+	messages := []llm.Message{
+		{Role: llm.RoleSystem, Content: "System"},
+		{Role: llm.RoleUser, Content: "User"},
+		{Role: llm.RoleAssistant, Content: "Assistant"},
+	}
+
+	result, err := summarizer.Summarize(context.Background(), messages, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// 应该返回原始消息，不做任何改动
+	if len(result) != len(messages) {
+		t.Errorf("Expected %d messages, got %d", len(messages), len(result))
+	}
+	for i, msg := range messages {
+		if result[i].Content != msg.Content {
+			t.Errorf("Message %d content changed", i)
+		}
+	}
+}
+
+// TestLLMSummarizer_Segmentation 测试消息分段逻辑
+func TestLLMSummarizer_Segmentation(t *testing.T) {
+	cfg := &Config{
+		KeepRecentRounds:            0,
+		SummarizationTimeout:        5 * time.Second,
+		SummarizationMaxInputTokens: 200, // 很小，强制分多段
+	}
+
+	mockClient := &mockSummaryClient{
+		summary: "Summary for batch",
+	}
+
+	summarizer := NewLLMSummarizer(mockClient, cfg)
+
+	// 创建带 System 和 User 的完整消息列表
+	messages := make([]llm.Message, 0, 22)
+	messages = append(messages, llm.Message{Role: llm.RoleSystem, Content: "System prompt"})
+	messages = append(messages, llm.Message{Role: llm.RoleUser, Content: "User message"})
+
+	// 添加大量中间消息（待摘要）
+	for i := 0; i < 20; i++ {
+		messages = append(messages, llm.Message{
+			Role:    llm.RoleTool,
+			Content: strings.Repeat("x", 200), // 每条约50 tokens
+		})
+	}
+
+	// 构造优先级（前2条保留，后面全部可摘要）
+	priorities := make([]MessagePriority, len(messages))
+	priorities[0] = MessagePriority{Index: 0, Score: 10.0, IsSystem: true}
+	priorities[1] = MessagePriority{Index: 1, Score: 8.0, IsUser: true}
+	for i := 2; i < len(priorities); i++ {
+		priorities[i] = MessagePriority{
+			Index:          i,
+			Score:          2.0,
+			IsIntermediate: true,
+		}
+	}
+
+	result, err := summarizer.Summarize(context.Background(), messages, priorities)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// 应该返回：System + Summary + User = 至少3条消息
+	if len(result) < 3 {
+		t.Errorf("Expected at least 3 messages (system + summary + user), got %d", len(result))
+	}
+
+	// 验证 mock client 被调用了（因为消息多，应该分段）
+	if mockClient.called < 1 {
+		t.Errorf("Expected mock client to be called at least once, got %d", mockClient.called)
+	}
+}
+
+// TestEngine_WithSummarizer 完整的 Engine + Mock Summarizer 集成测试
+func TestEngine_WithSummarizer(t *testing.T) {
+	cfg := &Config{
+		MaxContextTokens:            300,
+		Strategy:                    StrategyBalanced,
+		L1Threshold:                 250,
+		L2Threshold:                 200,
+		L3Threshold:                 150,
+		KeepRecentRounds:            2,
+		SummarizationTimeout:        5 * time.Second,
+		SummarizationMaxInputTokens: 8000,
+	}
+
+	mockClient := &mockSummaryClient{
+		summary: "Summarized context: The conversation covered file operations and debugging.",
+	}
+
+	engine, err := NewEngine(cfg, mockClient)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// 创建长对话 - 确保token数超过阈值
+	messages := make([]llm.Message, 0, 15)
+	messages = append(messages, llm.Message{Role: llm.RoleSystem, Content: "System prompt for the assistant"})
+	messages = append(messages, llm.Message{Role: llm.RoleUser, Content: "Help me with the project"})
+
+	for i := 0; i < 7; i++ {
+		messages = append(messages, llm.Message{
+			Role:    llm.RoleAssistant,
+			Content: strings.Repeat("a", 200), // 每条约50 tokens
+		})
+		messages = append(messages, llm.Message{
+			Role:    llm.RoleTool,
+			Content: strings.Repeat("b", 200), // 每条约50 tokens
+		})
+	}
+	// 保留最近一轮
+	messages = append(messages, llm.Message{
+		Role:    llm.RoleUser,
+		Content: "Final question",
+	})
+
+	result, err := engine.Compress(context.Background(), messages)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// 验证压缩比 < 1（说明有压缩发生）
+	if result.CompressionRatio >= 1.0 {
+		t.Errorf("Expected compression ratio < 1.0 with summarizer, got %.2f", result.CompressionRatio)
+	}
+
+	// 验证 System 和 User 消息被保留
+	if result.CompressedMessages[0].Role != llm.RoleSystem {
+		t.Error("System message should be preserved")
+	}
+
+	// 验证压缩统计信息包含 L1
+	if !strings.Contains(result.CompressionStats, "L1") {
+		t.Error("Compression stats should mention L1")
+	}
+
+	// 验证 mock client 被调用
+	if mockClient.called == 0 {
+		t.Error("Mock summarization client should have been called")
+	}
+}
+
+// TestRuleCompressor_L1WithNilSummarizer 测试 RuleCompressor L1 在 summarizer 为 nil 时降级
+func TestRuleCompressor_L1WithNilSummarizer(t *testing.T) {
+	cfg := &Config{
+		MaxContextTokens: 1000,
+		Strategy:         StrategyBalanced,
+	}
+
+	// 不传入 summarizer
+	rc := NewRuleCompressor(cfg, nil)
+
+	messages := []llm.Message{
+		{Role: llm.RoleSystem, Content: "System"},
+		{Role: llm.RoleUser, Content: "User"},
+		{Role: llm.RoleAssistant, Content: "Assistant"},
+	}
+
+	result, err := rc.L1Compress(context.Background(), messages, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// 应该返回原始消息
+	if len(result) != len(messages) {
+		t.Errorf("Expected %d messages, got %d", len(messages), len(result))
+	}
+}
+
+// TestRuleCompressor_L1WithSummarizer 测试 RuleCompressor L1 在 summarizer 存在时正常工作
+func TestRuleCompressor_L1WithSummarizer(t *testing.T) {
+	cfg := &Config{
+		KeepRecentRounds:            1,
+		SummarizationTimeout:        5 * time.Second,
+		SummarizationMaxInputTokens: 8000,
+	}
+
+	mockClient := &mockSummaryClient{
+		summary: "Summarized: project structure and auth module",
+	}
+
+	summarizer := NewLLMSummarizer(mockClient, cfg)
+	rc := NewRuleCompressor(cfg, summarizer)
+
+	messages := []llm.Message{
+		{Role: llm.RoleSystem, Content: "System"},
+		{Role: llm.RoleUser, Content: "User"},
+		{Role: llm.RoleAssistant, Content: strings.Repeat("x", 500)},
+		{Role: llm.RoleTool, Content: strings.Repeat("y", 500)},
+		{Role: llm.RoleUser, Content: "Final question"},
+	}
+
+	priorities := []MessagePriority{
+		{Index: 0, Score: 10.0, IsSystem: true},
+		{Index: 1, Score: 8.0, IsUser: true},
+		{Index: 2, Score: 4.0, IsIntermediate: true},
+		{Index: 3, Score: 2.0, IsIntermediate: true},
+		{Index: 4, Score: 8.0, IsUser: true},
+	}
+
+	result, err := rc.L1Compress(context.Background(), messages, priorities)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// 应该包含摘要消息
+	foundSummary := false
+	for _, msg := range result {
+		if strings.Contains(msg.Content, "[CONTEXT SUMMARY]") {
+			foundSummary = true
+			break
+		}
+	}
+	if !foundSummary {
+		t.Error("Result should contain summary message")
+	}
+}
+
 // FuzzEngine 模糊测试
 func FuzzEngine(f *testing.F) {
 	cfg := &Config{
 		MaxContextTokens: 1000,
 		Strategy:         StrategyBalanced,
+		L1Threshold:      800,
+		L2Threshold:      600,
+		L3Threshold:      400,
 		KeepRecentRounds: 2,
 	}
 
 	f.Add("system", "user", "assistant", "tool")
 	f.Add("", "", "", "")
 
-	engine, err := NewEngine(cfg)
+	engine, err := NewEngine(cfg, nil)
 	if err != nil {
 		f.Fatal(err)
 	}
diff --git a/internal/compact/compact_types.go b/internal/compact/compact_types.go
index b3911fb..12c948b 100644
--- a/internal/compact/compact_types.go
+++ b/internal/compact/compact_types.go
@@ -5,6 +5,13 @@ import (
 	"codeactor/internal/llm"
 )
 
+// SummarizationClient 摘要LLM客户端接口（最小化，只用于摘要）
+// 用于对低优先级消息进行智能摘要压缩
+type SummarizationClient interface {
+	// GenerateSummary 生成消息摘要。输入一批消息，输出结构化摘要文本。
+	GenerateSummary(ctx context.Context, messages []llm.Message) (string, error)
+}
+
 // ContextCompressor 上下文压缩器接口
 type ContextCompressor interface {
 	// Compress 压缩上下文，返回压缩后的messages和统计信息
diff --git a/internal/compact/compressor.go b/internal/compact/compressor.go
index a26f377..1a816b6 100644
--- a/internal/compact/compressor.go
+++ b/internal/compact/compressor.go
@@ -21,19 +21,22 @@ type Compressor interface {
 
 // RuleCompressor 规则压缩器（L2+L3）
 type RuleCompressor struct {
-	config *Config
+	config     *Config
+	summarizer *LLMSummarizer // 新增：LLM摘要器，可为nil（兼容无LLM客户端的场景）
 }
 
 // NewRuleCompressor 创建规则压缩器
-func NewRuleCompressor(config *Config) *RuleCompressor {
-	return &RuleCompressor{config: config}
+func NewRuleCompressor(config *Config, summarizer *LLMSummarizer) *RuleCompressor {
+	return &RuleCompressor{config: config, summarizer: summarizer}
 }
 
-// L1Compress LLM摘要压缩（当前为占位实现，需要SummarizationClient）
+// L1Compress LLM摘要压缩 — 使用LLM对低优先级消息做智能摘要
 func (rc *RuleCompressor) L1Compress(ctx context.Context, messages []llm.Message, priorities []MessagePriority) ([]llm.Message, error) {
-	// TODO: 当提供SummarizationClient时，实现LLM摘要压缩
-	// 当前返回原messages，不执行L1压缩
-	return messages, nil
+	if rc.summarizer == nil {
+		// 无LLM摘要器时降级，返回原始消息
+		return messages, nil
+	}
+	return rc.summarizer.Summarize(ctx, messages, priorities)
 }
 
 // L2Compress 规则压缩 - 截断超长tool输出
diff --git a/internal/compact/engine.go b/internal/compact/engine.go
index 0f96a4a..415e352 100644
--- a/internal/compact/engine.go
+++ b/internal/compact/engine.go
@@ -15,19 +15,27 @@ type Engine struct {
 	tokenizer    Tokenizer
 	priorityCalc *PriorityCalculator
 	ruleComp     *RuleCompressor
+	summarizer   *LLMSummarizer // 新增：LLM摘要器
 }
 
 // NewEngine 创建压缩引擎
-func NewEngine(config *Config) (*Engine, error) {
+func NewEngine(config *Config, summarizationClient SummarizationClient) (*Engine, error) {
 	if err := config.Validate(); err != nil {
 		return nil, fmt.Errorf("invalid compact config: %w", err)
 	}
 
+	// 创建LLM摘要器（如果提供了客户端）
+	var summarizer *LLMSummarizer
+	if summarizationClient != nil {
+		summarizer = NewLLMSummarizer(summarizationClient, config)
+	}
+
 	return &Engine{
 		config:       config,
 		tokenizer:    GetGlobalTokenizer(),
 		priorityCalc: NewPriorityCalculator(DefaultPriorityWeights),
-		ruleComp:     NewRuleCompressor(config),
+		ruleComp:     NewRuleCompressor(config, summarizer),
+		summarizer:   summarizer,
 	}, nil
 }
 
@@ -140,9 +148,17 @@ func (e *Engine) compressBalanced(
 ) ([]llm.Message, []string) {
 	current := messages
 
-	// L1: 尝试摘要压缩
-	if originalTokens > e.config.L1Threshold {
-		// TODO: 当有SummarizationClient时调用
+	// L1: 尝试LLM摘要压缩
+	if originalTokens > e.config.L1Threshold && e.summarizer != nil {
+		compressed, err := e.ruleComp.L1Compress(context.Background(), current, priorities)
+		if err != nil {
+			stats = append(stats, "L1: Failed - "+err.Error())
+		} else {
+			current = compressed
+			tokens, _ := e.CountTokens(current)
+			stats = append(stats, fmt.Sprintf("L1: LLM summarization applied (%d tokens)", tokens))
+		}
+	} else if originalTokens > e.config.L1Threshold {
 		stats = append(stats, "L1: Skipped (no summarization client)")
 	}
 
@@ -172,8 +188,17 @@ func (e *Engine) compressAggressive(
 ) ([]llm.Message, []string) {
 	current := messages
 
-	// L1: 尝试摘要
-	if originalTokens > e.config.L1Threshold {
+	// L1: 尝试LLM摘要
+	if originalTokens > e.config.L1Threshold && e.summarizer != nil {
+		compressed, err := e.ruleComp.L1Compress(context.Background(), current, priorities)
+		if err != nil {
+			stats = append(stats, "L1: Failed - "+err.Error())
+		} else {
+			current = compressed
+			tokens, _ := e.CountTokens(current)
+			stats = append(stats, fmt.Sprintf("L1: LLM summarization applied (%d tokens)", tokens))
+		}
+	} else if originalTokens > e.config.L1Threshold {
 		stats = append(stats, "L1: Skipped (no summarization client)")
 	}
 
diff --git a/internal/compact/summarizer.go b/internal/compact/summarizer.go
new file mode 100644
index 0000000..6ec53de
--- /dev/null
+++ b/internal/compact/summarizer.go
@@ -0,0 +1,306 @@
+package compact
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"strings"
+	"sync"
+
+	"codeactor/internal/llm"
+)
+
+// defaultSummarizationPrompt 默认摘要提示词（英文版本，与 agent prompts 风格一致）
+const defaultSummarizationPrompt = `# Role
+You are a **Conversation Summarizer** for an AI-powered coding assistant system. Your task is to compress conversation history without losing any critical context needed for ongoing development work.
+
+# Task
+Extract the following from the provided conversation fragment:
+
+1. **Task Progress**: What tasks have been completed? What is currently in progress?
+2. **Key Decisions**: What important architectural or design decisions were made? Why?
+3. **Code Changes**: Which files were modified? What are the key code patterns introduced?
+4. **Errors & Fixes**: What problems were encountered? How were they resolved?
+5. **Critical Discoveries**: Important facts about the codebase — file structure, dependencies, tech stack, conventions, etc.
+
+# Rules
+- **Preserve Identifiers**: Retain ALL specific identifiers — file names, function names, class names, variable names, paths.
+- **Preserve Error Details**: Keep concrete error messages and their corresponding fix strategies verbatim.
+- **Ignore Redundancy**: Skip duplicated tool output content; keep only the meaningful results.
+- **Be Complete**: Do NOT omit any context that could be useful for continuing the work.
+- **Be Concise**: Summarize efficiently; prefer bullet points over verbose prose.
+
+# Output Format
+- Use clear, structured Markdown.
+- Output in **English**.
+- Organize extracted information under the 5 categories listed above.`
+
+// SummarizationClient 摘要LLM客户端接口（已在compact_types.go中定义）
+
+// LLMSummarizer LLM驱动的上下文摘要器
+type LLMSummarizer struct {
+	client SummarizationClient
+	config *Config
+}
+
+// NewLLMSummarizer 创建LLM摘要器
+func NewLLMSummarizer(client SummarizationClient, config *Config) *LLMSummarizer {
+	return &LLMSummarizer{
+		client: client,
+		config: config,
+	}
+}
+
+// Summarize 对消息列表中的可压缩部分做LLM摘要
+// 输入: 完整消息列表 + 优先级信息
+// 输出: 替换方案 — 哪些消息被替换为摘要System消息
+func (s *LLMSummarizer) Summarize(
+	ctx context.Context,
+	messages []llm.Message,
+	priorities []MessagePriority,
+) ([]llm.Message, error) {
+	if s.client == nil {
+		return messages, nil
+	}
+
+	// 1. 分区：按优先级将消息分为保留区、摘要区
+	keepRegion := make([]llm.Message, 0)
+	summaryRegion := make([]llm.Message, 0)
+
+	for i, p := range priorities {
+		msg := messages[i]
+
+		// 始终保留的消息
+		if p.IsSystem || p.IsUser || p.IsRecent {
+			keepRegion = append(keepRegion, msg)
+			continue
+		}
+
+		// 早期对话轻微保留（保留第一条和最后一条作为上下文锚点）
+		if p.IsEarly {
+			if i == 0 || i == len(messages)/3-1 {
+				keepRegion = append(keepRegion, msg)
+				continue
+			}
+		}
+
+		// 其余消息进入摘要区
+		summaryRegion = append(summaryRegion, msg)
+	}
+
+	// 如果没有可摘要的消息，直接返回原始消息
+	if len(summaryRegion) == 0 {
+		slog.Debug("LLM summarizer: no messages to summarize")
+		return messages, nil
+	}
+
+	slog.Info("LLM summarizer: summarizing messages",
+		"total_messages", len(messages),
+		"keep_region", len(keepRegion),
+		"summary_region", len(summaryRegion))
+
+	// 2. 分段：将摘要区消息按token限制分为多个批次
+	batches := s.segmentMessages(summaryRegion)
+
+	// 3. 并发摘要：对每个批次调用LLM
+	summaryResults := make([]string, len(batches))
+	var wg sync.WaitGroup
+	var firstErr error
+	var errMu sync.Mutex
+
+	for i, batch := range batches {
+		wg.Add(1)
+		go func(idx int, batchMsgs []llm.Message) {
+			defer wg.Done()
+
+			// 创建带超时的上下文
+			sumCtx, cancel := context.WithTimeout(ctx, s.config.SummarizationTimeout)
+			defer cancel()
+
+			summary, err := s.client.GenerateSummary(sumCtx, batchMsgs)
+			if err != nil {
+				errMu.Lock()
+				if firstErr == nil {
+					firstErr = fmt.Errorf("batch %d summarization failed: %w", idx, err)
+				}
+				errMu.Unlock()
+				return
+			}
+			summaryResults[idx] = summary
+		}(i, batch)
+	}
+
+	wg.Wait()
+
+	if firstErr != nil {
+		slog.Warn("LLM summarization partially failed", "error", firstErr)
+		// 部分失败：使用非空的摘要结果
+		var validSummaries []string
+		for _, s := range summaryResults {
+			if s != "" {
+				validSummaries = append(validSummaries, s)
+			}
+		}
+		if len(validSummaries) == 0 {
+			return messages, fmt.Errorf("all summarization batches failed")
+		}
+		summaryResults = validSummaries
+	}
+
+	// 4. 合并：将所有摘要合并为一条System消息
+	summaryPrompt := s.config.SummarizationPrompt
+	if summaryPrompt == "" {
+		summaryPrompt = defaultSummarizationPrompt
+	}
+
+	var fullSummary strings.Builder
+	fullSummary.WriteString(summaryPrompt + "\n\n---对话摘要---\n\n")
+	for i, summary := range summaryResults {
+		fullSummary.WriteString(fmt.Sprintf("## 摘要段 %d\n%s\n\n", i+1, summary))
+	}
+
+	// 5. 构建结果：[原始System消息] + [摘要System消息] + [保留区消息]
+	result := s.buildResult(messages, keepRegion, fullSummary.String())
+
+	slog.Info("LLM summarization completed",
+		"original_messages", len(messages),
+		"result_messages", len(result),
+		"summaries_generated", len(summaryResults))
+
+	return result, nil
+}
+
+// calculateThreshold 计算优先级阈值
+// 取所有消息优先级的中位数作为分界线
+func (s *LLMSummarizer) calculateThreshold(priorities []MessagePriority) float64 {
+	if len(priorities) == 0 {
+		return 5.0
+	}
+	// 简单取前70%分数作为阈值
+	idx := len(priorities) * 7 / 10
+	if idx >= len(priorities) {
+		idx = len(priorities) - 1
+	}
+	return priorities[idx].Score
+}
+
+// segmentMessages 将摘要区消息按token限制分段
+// 每段不超过 SummarizationMaxInputTokens
+func (s *LLMSummarizer) segmentMessages(messages []llm.Message) [][]llm.Message {
+	if len(messages) == 0 {
+		return nil
+	}
+
+	maxTokens := s.config.SummarizationMaxInputTokens
+	if maxTokens <= 0 {
+		maxTokens = 8000 // 默认值
+	}
+
+	var batches [][]llm.Message
+	var currentBatch []llm.Message
+	var currentTokens int
+
+	getApproxTokens := func(content string) int {
+		// 粗略估算：约4个字符=1个token
+		return len([]rune(content)) / 4
+	}
+
+	for _, msg := range messages {
+		msgTokens := getApproxTokens(msg.Content)
+
+		// 单条消息就超限，强制拆分为一段
+		if msgTokens > maxTokens && len(currentBatch) == 0 {
+			// 直接加入当前批次，让后续逻辑处理
+			currentBatch = append(currentBatch, msg)
+			currentTokens = msgTokens
+			continue
+		}
+
+		// 当前批次加上这条消息会超限
+		if currentTokens+msgTokens > maxTokens && len(currentBatch) > 0 {
+			batches = append(batches, currentBatch)
+			currentBatch = []llm.Message{msg}
+			currentTokens = msgTokens
+		} else {
+			currentBatch = append(currentBatch, msg)
+			currentTokens += msgTokens
+		}
+	}
+
+	// 添加最后一个批次
+	if len(currentBatch) > 0 {
+		batches = append(batches, currentBatch)
+	}
+
+	// 如果没有批次（空消息），返回nil
+	if len(batches) == 0 {
+		return nil
+	}
+
+	return batches
+}
+
+// buildResult 构建压缩后的消息列表
+// 规则：[原始System消息] + [摘要System消息] + [保留区消息]
+func (s *LLMSummarizer) buildResult(
+	originalMessages []llm.Message,
+	keepRegion []llm.Message,
+	summary string,
+) []llm.Message {
+	result := make([]llm.Message, 0, len(keepRegion)+2)
+
+	// 始终保留原始System消息（如果存在）
+	if len(originalMessages) > 0 && originalMessages[0].Role == llm.RoleSystem {
+		result = append(result, originalMessages[0])
+	}
+
+	// 添加摘要消息（作为System消息）
+	if summary != "" {
+		result = append(result, llm.Message{
+			Role:    llm.RoleSystem,
+			Content: "[CONTEXT SUMMARY]\n" + summary,
+		})
+	}
+
+	// 添加保留区消息
+	result = append(result, keepRegion...)
+
+	return result
+}
+
+// ─────────────────────────────────────────────────────────
+// 适配器：将 llm.Engine 适配为 SummarizationClient
+// ─────────────────────────────────────────────────────────
+
+// SummaryAdapter 将 llm.Engine 适配为 SummarizationClient
+type SummaryAdapter struct {
+	LLM         llm.Engine
+	Model       string
+	Temperature float64
+	MaxTokens   int
+}
+
+// GenerateSummary 实现 SummarizationClient 接口
+func (a *SummaryAdapter) GenerateSummary(ctx context.Context, messages []llm.Message) (string, error) {
+	// 构造摘要请求
+	systemMsg := llm.Message{
+		Role:    llm.RoleSystem,
+		Content: defaultSummarizationPrompt,
+	}
+	allMessages := append([]llm.Message{systemMsg}, messages...)
+
+	opts := &llm.CallOptions{
+		MaxTokens:   a.MaxTokens,
+		Temperature: a.Temperature,
+	}
+
+	resp, err := a.LLM.GenerateContent(ctx, allMessages, nil, opts)
+	if err != nil {
+		return "", fmt.Errorf("summarization failed: %w", err)
+	}
+	if len(resp.Choices) == 0 {
+		return "", fmt.Errorf("summarization returned empty response")
+	}
+
+	return resp.Choices[0].Content, nil
+}
diff --git a/internal/config/config.go b/internal/config/config.go
index 05f12db..94b2e76 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -106,8 +106,8 @@ type Config struct {
 	Compact ContextCompactConfig    `toml:"context"` // [context] - 上下文压缩配置
 }
 
-// getProvider returns a provider config by name from the shared provider pool.
-func (c *Config) getProvider(name string) (*ProviderConfig, error) {
+// GetProvider returns a provider config by name from the shared provider pool.
+func (c *Config) GetProvider(name string) (*ProviderConfig, error) {
 	if name == "" {
 		return nil, fmt.Errorf("empty provider name")
 	}
@@ -196,20 +196,20 @@ func (c *Config) ResolveProvider(agentName, toolName string) (*ProviderConfig, e
 	// 1-2. Tool-level override (highest priority)
 	if toolName != "" {
 		if name := c.resolveToolProvider(toolName); name != "" {
-			return c.getProvider(name)
+			return c.GetProvider(name)
 		}
 	}
 
 	// 3-4. Agent-level override
 	if agentName != "" {
 		if name := c.resolveAgentProvider(agentName); name != "" {
-			return c.getProvider(name)
+			return c.GetProvider(name)
 		}
 	}
 
 	// 5. Global override
 	if c.Global.LLM != nil && c.Global.LLM.UseProvider != "" {
-		return c.getProvider(c.Global.LLM.UseProvider)
+		return c.GetProvider(c.Global.LLM.UseProvider)
 	}
 
 	// 6. No provider configured
@@ -309,7 +309,7 @@ func (c *Config) validate() error {
 		return fmt.Errorf("no providers configured in LLM section")
 	}
 
-	activeProvider, err := c.getProvider(effectiveProvider)
+	activeProvider, err := c.GetProvider(effectiveProvider)
 	if err != nil {
 		return err
 	}
@@ -354,6 +354,10 @@ type ContextCompactConfig struct {
 	// SummarizationModel 用于L1摘要的轻量模型
 	SummarizationModel string `toml:"summarization_model"`
 
+	// SummarizationProvider 用于L1摘要的LLM provider名称（可选，指向 providers 中定义的 provider）
+	// 为空则复用主 agent 的 LLM 引擎
+	SummarizationProvider string `toml:"summarization_provider"`
+
 	// L1Threshold 触发L1压缩的阈值
 	L1Threshold int `toml:"l1_token_threshold"`
 
@@ -371,4 +375,7 @@ type ContextCompactConfig struct {
 
 	// KeepTaskConclusions 保留已完成任务的结论数
 	KeepTaskConclusions int `toml:"keep_task_conclusions"`
+
+	// SummarizationMaxInputTokens 摘要时单批次最大输入token数
+	SummarizationMaxInputTokens int `toml:"summarization_max_input_tokens"`
 }
diff --git a/internal/llm/llm.go b/internal/llm/llm.go
index 936a31a..5c970ef 100644
--- a/internal/llm/llm.go
+++ b/internal/llm/llm.go
@@ -65,6 +65,11 @@ type LoggingEngine struct {
 	inner Engine
 }
 
+// NewLoggingEngine creates a LoggingEngine that wraps the given inner engine
+func NewLoggingEngine(inner Engine) *LoggingEngine {
+	return &LoggingEngine{inner: inner}
+}
+
 func (l *LoggingEngine) GenerateContent(ctx context.Context, messages []Message, tools []ToolDef, opts *CallOptions) (*Response, error) {
 	if msgsJSON, err := json.MarshalIndent(messages, "", "  "); err == nil {
 		LogLLMContent("LLM Input (messages)", string(msgsJSON))