From ba10f4e1719fa443837cbf956e86e3fbbbd317cf Mon Sep 17 00:00:00 2001 From: iohub Date: Wed, 6 May 2026 10:19:04 +0800 Subject: [PATCH 1/4] feat: add command mode with hidden input during task execution - Auto-enable command mode after task submission with minimal UI - Hide input textarea and show compact command prompt with tips - Add keyboard shortcuts: f/b for page up/down, j/k for scroll, i/enter to exit, esc to cancel running task - Auto-disable command mode when task completes - Add i18n translations for command mode prompt and tips (zh/en) --- i18n.go | 17 ++++++-- tui.go | 118 ++++++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 107 insertions(+), 28 deletions(-) diff --git a/i18n.go b/i18n.go index 3b8faf9..075bd00 100644 --- a/i18n.go +++ b/i18n.go @@ -44,6 +44,9 @@ type translations struct { HistoryConfirmDelete string // Confirmation dialog ConfirmDialogHelp string + // Command mode + CommandModePrompt string + CommandModeTips string } var langMap = map[Language]translations{ @@ -78,6 +81,8 @@ var langMap = map[Language]translations{ HistoryKeyClearFilter: "ctrl+u: 清除过滤", HistoryConfirmDelete: "确认删除此会话?(y = 确认, 其他键 = 取消)", ConfirmDialogHelp: "←/→ 选择 enter 确认 a 允许 s 全部允许 d/esc 拒绝", + CommandModePrompt: "命令模式", + CommandModeTips: "f:下翻页 b:上翻页 i:输入 esc:取消", }, LangEnglish: { Title: "CodeActor AI Assistant", @@ -109,7 +114,9 @@ var langMap = map[Language]translations{ HistoryKeyBack: "esc: back", HistoryKeyClearFilter: "ctrl+u: clear filter", HistoryConfirmDelete: "Delete this conversation? (y = confirm, any other key = cancel)", - ConfirmDialogHelp: "\u2190/\u2192 choose enter confirm a allow s all d/esc deny", + ConfirmDialogHelp: "←/→ choose enter confirm a allow s all d/esc deny", + CommandModePrompt: "COMMAND", + CommandModeTips: "f:pgdn b:pgup i:input esc:cancel", }, } @@ -190,8 +197,12 @@ func (lm *LanguageManager) GetText(key string) string { case "HistoryConfirmDelete": return translations.HistoryConfirmDelete case "ConfirmDialogHelp": - return translations.ConfirmDialogHelp - default: + return translations.ConfirmDialogHelp + case "CommandModePrompt": + return translations.CommandModePrompt + case "CommandModeTips": + return translations.CommandModeTips + default: return fmt.Sprintf("[Missing translation: %s]", key) } } diff --git a/tui.go b/tui.go index e65a4da..c95d833 100644 --- a/tui.go +++ b/tui.go @@ -181,6 +181,9 @@ type model struct { publisher *messaging.MessagePublisher publisherCh chan *messaging.MessagePublisher + // Command mode: input hidden, minimal tips shown (auto-enabled after task submission) + commandMode bool + // Tool call state tracking: tool_call_id → ToolEntry toolCallEntries map[string]*tui.ToolEntry @@ -580,6 +583,55 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { } } + // Command mode key handling (input hidden, minimal keys active) + if m.commandMode && m.taskRunning { + switch msg.String() { + case "ctrl+c": + m.quitting = true + return m, tea.Quit + + case "esc": + // Cancel the currently running task + if m.currentTask != nil && m.currentTask.CancelFunc != nil { + m.currentTask.CancelFunc() + m.logEntries = append(m.logEntries, logEntry{ + timestamp: time.Now(), + eventType: "status", + content: "Task cancelled by user", + }) + m.appendLogEntry(&m.logEntries[len(m.logEntries)-1]) + } + return m, nil + + case "i", "enter": + // Exit command mode, show input for follow-up + m.commandMode = false + return m, nil + + case "f": + m.viewport.PageDown() + return m, nil + + case "b": + m.viewport.PageUp() + return m, nil + + case "j", "down": + m.viewport.LineDown(1) + return m, nil + + case "k", "up": + m.viewport.LineUp(1) + return m, nil + + default: + // Pass to viewport for scrolling + var vpCmd tea.Cmd + m.viewport, vpCmd = m.viewport.Update(msg) + return m, vpCmd + } + } + switch msg.String() { case "ctrl+c": m.quitting = true @@ -719,6 +771,7 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { case taskCompleteMsg: m.taskRunning = false + m.commandMode = false m.confirmDialog.open = false // safety: close any stale dialog if msg.err != nil { m.errMsg = msg.err.Error() @@ -832,33 +885,46 @@ func (m model) View() string { b.WriteString(logSeparatorStyle.Render(strings.Repeat("─", sepWidth))) b.WriteString("\n") - // Input line (textarea handles its own prompt via PromptFunc) - m.input.SetWidth(m.computeFieldWidth()) - inputLine := m.input.View() - - // Build footer area - var footer strings.Builder - footer.WriteString(lipgloss.NewStyle().MarginLeft(2).Render(inputLine)) - footer.WriteString("\n") + // Input line / command mode + // Build footer area + var footer strings.Builder + + if m.commandMode && m.taskRunning { + // Command mode: hidden input, minimal prompt with tips + cmdPromptStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("39")).Bold(true) + cmdTipStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("245")) + cmdPrompt := cmdPromptStyle.Render("❯ " + langManager.GetText("CommandModePrompt")) + cmdTips := cmdTipStyle.Render(langManager.GetText("CommandModeTips")) + cmdLine := cmdPrompt + " " + cmdTips + footer.WriteString(lipgloss.NewStyle().MarginLeft(2).Render(cmdLine)) + footer.WriteString("\n") + } else { + m.input.SetWidth(m.computeFieldWidth()) + inputLine := m.input.View() + footer.WriteString(lipgloss.NewStyle().MarginLeft(2).Render(inputLine)) + footer.WriteString("\n") + } - // Error message - if m.errMsg != "" { - footer.WriteString(lipgloss.NewStyle().MarginLeft(2).Render(errorStyle.Render("✖ " + m.errMsg))) - footer.WriteString("\n") - } + // Error message + if m.errMsg != "" { + footer.WriteString(lipgloss.NewStyle().MarginLeft(2).Render(errorStyle.Render("✖ " + m.errMsg))) + footer.WriteString("\n") + } - // Status line: shortcuts + task indicator - taskIndicator := "" - if m.taskRunning { - taskIndicator = logStatusStyle.Render(" ◷ Running...") - } - footer.WriteString("\n") - enterLabel := "ctrl+s submit" - if m.currentTask != nil && !m.taskRunning { - enterLabel = "ctrl+s send" - } - statusLine := footerStyle.Render(enterLabel+" │ ctrl+l lang │ ctrl+h history │ esc cancel │ ctrl+c quit") + taskIndicator - footer.WriteString(lipgloss.NewStyle().MarginLeft(2).Render(statusLine)) + // Status line: shortcuts + task indicator (hidden in command mode) + if !m.commandMode || !m.taskRunning { + taskIndicator := "" + if m.taskRunning { + taskIndicator = logStatusStyle.Render(" ◷ Running...") + } + footer.WriteString("\n") + enterLabel := "ctrl+s submit" + if m.currentTask != nil && !m.taskRunning { + enterLabel = "ctrl+s send" + } + statusLine := footerStyle.Render(enterLabel+" │ ctrl+l lang │ ctrl+h history │ esc cancel │ ctrl+c quit") + taskIndicator + footer.WriteString(lipgloss.NewStyle().MarginLeft(2).Render(statusLine)) + } b.WriteString(footer.String()) @@ -1355,6 +1421,7 @@ func (m *model) submitTask() tea.Cmd { taskDesc := strings.TrimSpace(m.input.Value()) m.input.SetValue("") m.taskRunning = true + m.commandMode = true m.errMsg = "" ctx, cancel := context.WithCancel(context.Background()) @@ -1392,6 +1459,7 @@ func (m *model) submitTask() tea.Cmd { func (m *model) submitFollowUp(message string) tea.Cmd { m.input.SetValue("") m.taskRunning = true + m.commandMode = true m.errMsg = "" m.logEntries = append(m.logEntries, logEntry{ From 148f41c300ad9f9d1a2bcafca8cf518a4618eba2 Mon Sep 17 00:00:00 2001 From: iohub Date: Wed, 6 May 2026 11:17:48 +0800 Subject: [PATCH 2/4] feat: add DevOps-Agent for operational and system administration tasks --- internal/agents/conductor.go | 21 ++++++- internal/agents/conductor.prompt.md | 17 ++++-- internal/agents/conductor_test.go | 16 ++--- internal/agents/devops.go | 91 +++++++++++++++++++++++++++++ internal/agents/devops.prompt.md | 61 +++++++++++++++++++ internal/app/app.go | 9 ++- internal/config/config.go | 1 + main.go | 4 +- 8 files changed, 202 insertions(+), 18 deletions(-) create mode 100644 internal/agents/devops.go create mode 100644 internal/agents/devops.prompt.md diff --git a/internal/agents/conductor.go b/internal/agents/conductor.go index a0a6dd1..0335831 100644 --- a/internal/agents/conductor.go +++ b/internal/agents/conductor.go @@ -42,6 +42,7 @@ type ConductorAgent struct { CodingAgent *CodingAgent ChatAgent *ChatAgent MetaAgent *MetaAgent + DevOpsAgent *DevOpsAgent GlobalCtx *globalctx.GlobalCtx Adapters []*tools.Adapter maxSteps int @@ -50,7 +51,7 @@ type ConductorAgent struct { customAgents map[string]*CustomAgent // delegate_ → agent design } -func NewConductorAgent(globalCtx *globalctx.GlobalCtx, engine llm.Engine, repo *RepoAgent, coding *CodingAgent, chat *ChatAgent, meta *MetaAgent, maxSteps int, disabledAgents map[string]bool, metaRetryCount int) *ConductorAgent { +func NewConductorAgent(globalCtx *globalctx.GlobalCtx, engine llm.Engine, repo *RepoAgent, coding *CodingAgent, chat *ChatAgent, meta *MetaAgent, devops *DevOpsAgent, maxSteps int, disabledAgents map[string]bool, metaRetryCount int) *ConductorAgent { // self-reference for closures that need the ConductorAgent after construction var self *ConductorAgent delegateRepo := tools.NewAdapter("delegate_repo", "Delegate analysis task to Repo-Agent", func(ctx context.Context, params map[string]interface{}) (interface{}, error) { @@ -98,6 +99,20 @@ func NewConductorAgent(globalCtx *globalctx.GlobalCtx, engine llm.Engine, repo * "required": []string{"task"}, }) + delegateDevOps := tools.NewAdapter("delegate_devops", "Delegate operational and system administration tasks to DevOps-Agent. DevOps-Agent can run shell commands, inspect files, check logs, manage processes, and perform any non-coding infrastructure work. Use this for tasks like checking disk usage, finding files, running diagnostics, inspecting configurations, or executing ad-hoc shell commands.", func(ctx context.Context, params map[string]interface{}) (interface{}, error) { + task, ok := params["task"].(string) + if !ok { + return nil, fmt.Errorf("task parameter required") + } + return devops.Run(ctx, task) + }).WithSchema(map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "task": map[string]interface{}{"type": "string", "description": "The operational task for DevOps-Agent, e.g., 'check disk usage', 'find all log files modified today', 'check if port 8080 is in use'."}, + }, + "required": []string{"task"}, + }) + delegateMeta := tools.NewAdapter("delegate_meta", "Delegate to Meta-Agent to DESIGN a custom specialized agent. Meta-Agent will craft a tailored system prompt using prompt engineering best practices and select appropriate tools. The designed agent is automatically registered and immediately executed to complete the task. After this, the new agent becomes a permanent delegate tool for future use.", func(ctx context.Context, params map[string]interface{}) (interface{}, error) { task, ok := params["task"].(string) if !ok { @@ -247,6 +262,9 @@ func NewConductorAgent(globalCtx *globalctx.GlobalCtx, engine llm.Engine, repo * if !disabledAgents["meta"] { delegateAdapters = append(delegateAdapters, delegateMeta) } + if !disabledAgents["devops"] { + delegateAdapters = append(delegateAdapters, delegateDevOps) + } // Set workspace guard on all adapters (delegate adapters are not dangerous tools) tools.SetGuardOnAdapters(adapters, globalCtx.Guard) @@ -258,6 +276,7 @@ func NewConductorAgent(globalCtx *globalctx.GlobalCtx, engine llm.Engine, repo * CodingAgent: coding, ChatAgent: chat, MetaAgent: meta, + DevOpsAgent: devops, GlobalCtx: globalCtx, Adapters: append(adapters, delegateAdapters...), maxSteps: maxSteps, diff --git a/internal/agents/conductor.prompt.md b/internal/agents/conductor.prompt.md index e00e87a..93cc2b0 100644 --- a/internal/agents/conductor.prompt.md +++ b/internal/agents/conductor.prompt.md @@ -30,7 +30,13 @@ You have access to the following specialized sub-agents. You must delegate to th * **Use Case**: Use for ANY query that does not require repository analysis or code modification. Examples: "What is Dependency Injection?", "Who is Alan Turing?", "How do I make coffee?", "Write a haiku", or "Hello". * **Restriction**: Cannot access file system or modify code. -4. **Meta-Agent (The Agent Architect)** +4. **DevOps-Agent (The Operator)** + * **Tool**: `delegate_devops` + * **Capabilities**: Handles ALL non-coding operational tasks. Can run shell commands (`run_bash`), inspect files, check logs, manage processes, browse directories, search file contents, and perform system diagnostics. Equipped with `thinking` and `micro_agent` for self-correction and deep analysis of command output. + * **Use Case**: System administration, infrastructure inspection, running ad-hoc commands, checking disk/logs/processes/networking, and any operational task that does not involve writing or modifying code. Examples: "Check disk usage", "Find all log files modified today", "What processes are using the most memory?", "Restart the development server". + * **Restriction**: Cannot modify or create files. Read-only file inspection + shell execution only. + +5. **Meta-Agent (The Agent Architect)** * **Tool**: `delegate_meta` * **Capabilities**: Designs and instantiates CUSTOM specialized agents on-the-fly when NO existing agent can handle the task. It uses advanced prompt engineering best practices (structured control, cognitive architecture, anti-hallucination, task decomposition, etc.) to craft a tailored system prompt, select the minimal set of required tools, execute the task, and return structured results. **After execution, the designed agent is automatically registered as a new permanent delegate tool** (e.g., `delegate_security_auditor`) and added to the system prompt for future use. * **Use Case**: Use this when you encounter a task that falls outside the capabilities of Repo/Coding/Chat agents. Examples: @@ -51,16 +57,17 @@ You have access to the following specialized sub-agents. You must delegate to th ### Workflow Strategy Your core decision loop: **Analyze → Design (if needed) → Execute → Review → Iterate**. -Working agents that produce final output are: **Coding-Agent**, **Chat-Agent**, and any **Custom-Agent** registered by Meta-Agent. Repo-Agent and Meta-Agent are support agents: Repo-Agent gathers context, Meta-Agent designs new specialized agents. +Working agents that produce final output are: **Coding-Agent**, **Chat-Agent**, **DevOps-Agent**, and any **Custom-Agent** registered by Meta-Agent. Repo-Agent and Meta-Agent are support agents: Repo-Agent gathers context, Meta-Agent designs new specialized agents. **Phase 0: Task Classification & Agent Selection (MANDATORY first step)** * Upon receiving a task, FIRST classify it and decide the execution strategy. * Check the **Custom Agents** section — if a registered custom agent already matches the task domain, prefer reusing it. * **Decision Tree**: 1. **Pure chat / Q&A / explanation** → delegate directly to **Chat-Agent**. - 2. **Coding task** that existing agents (Coding + Repo for context) can handle → follow Phases 1-4 below. - 3. **Task requiring specialized expertise, unique execution patterns, or capabilities beyond existing agents** → **Design a custom agent FIRST via `delegate_meta`**, then delegate to the newly registered agent. - 4. **Previously registered custom agent matches the domain** → delegate directly to that custom agent (`delegate_`). + 2. **Operational / DevOps task** (shell commands, system inspection, log checks, process management) → delegate directly to **DevOps-Agent** via `delegate_devops`. + 3. **Coding task** that existing agents (Coding + Repo for context) can handle → follow Phases 1-4 below. + 4. **Task requiring specialized expertise, unique execution patterns, or capabilities beyond existing agents** → **Design a custom agent FIRST via `delegate_meta`**, then delegate to the newly registered agent. + 5. **Previously registered custom agent matches the domain** → delegate directly to that custom agent (`delegate_`). * **Key principle**: Design the agent BEFORE executing complex work. A well-designed custom agent produces higher quality output than trying to force a generic agent into a specialized role. **Phase 1: Context Gathering (when coding tasks need repository understanding)** diff --git a/internal/agents/conductor_test.go b/internal/agents/conductor_test.go index 43141c9..ebea510 100644 --- a/internal/agents/conductor_test.go +++ b/internal/agents/conductor_test.go @@ -51,7 +51,7 @@ func newTestConductorAgent(t *testing.T, workDir string) *ConductorAgent { t.Helper() gctx := newTestGlobalCtx(workDir) engine := &mockEngine{} - return NewConductorAgent(gctx, engine, nil, nil, nil, nil, 10, nil, 3) + return NewConductorAgent(gctx, engine, nil, nil, nil, nil, nil, 10, nil, 3) } // makeMetaOutput builds a valid Meta-Agent JSON output string. @@ -347,7 +347,7 @@ func TestCustomAgentDelegateTool_Execution(t *testing.T) { } // Build conductor with mocked LLM - conductor := NewConductorAgent(gctx, customEngine, nil, nil, nil, nil, 10, nil, 3) + conductor := NewConductorAgent(gctx, customEngine, nil, nil, nil, nil, nil, 10, nil, 3) ca := &CustomAgent{ Name: "test_executor", @@ -410,7 +410,7 @@ func TestCustomAgentDelegateTool_FinishTerminates(t *testing.T) { }, } - conductor := NewConductorAgent(gctx, customEngine, nil, nil, nil, nil, 10, nil, 3) + conductor := NewConductorAgent(gctx, customEngine, nil, nil, nil, nil, nil, 10, nil, 3) ca := &CustomAgent{ Name: "finisher", @@ -541,7 +541,7 @@ func TestDelegateMeta_DynamicRegistration(t *testing.T) { metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(metaOutput)) // ConductorAgent - conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, 10, nil, 3) + conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3) initialAdapterCount := len(conductor.Adapters) // Find and call delegate_meta tool @@ -617,7 +617,7 @@ func TestDelegateMeta_DuplicateRegistrationPrevented(t *testing.T) { ) metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(metaOutput)) - conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, 10, nil, 3) + conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3) // Call delegate_meta twice with the same agent design var delegateMeta *tools.Adapter @@ -657,7 +657,7 @@ func TestDelegateMeta_ParseFailure_ReturnsRawOutput(t *testing.T) { // Meta-Agent returns malformed output (no execution_result block) malformedOutput := "Just some plain text without structured blocks." metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(malformedOutput)) - conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, 10, nil, 3) + conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3) var delegateMeta *tools.Adapter for _, ad := range conductor.Adapters { @@ -700,7 +700,7 @@ func TestDelegateMeta_EmptyAgentName_NoRegistration(t *testing.T) { []string{"read_file"}, ) metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(metaOutput)) - conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, 10, nil, 3) + conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3) var delegateMeta *tools.Adapter for _, ad := range conductor.Adapters { @@ -730,7 +730,7 @@ func TestDelegateMeta_NoAgentDesign_NoRegistration(t *testing.T) { output := `{"thinking": "designing...", "agent_name": "Test Agent", "tools_used": ["read_file"], "result": {"key": "value"}}` metaAgent := NewMetaAgent(gctx, metaAgentMockLLM(output)) - conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, 10, nil, 3) + conductor := NewConductorAgent(gctx, &mockEngine{}, nil, nil, nil, metaAgent, nil, 10, nil, 3) var delegateMeta *tools.Adapter for _, ad := range conductor.Adapters { diff --git a/internal/agents/devops.go b/internal/agents/devops.go new file mode 100644 index 0000000..447f2cb --- /dev/null +++ b/internal/agents/devops.go @@ -0,0 +1,91 @@ +package agents + +import ( + "context" + _ "embed" + "encoding/json" + + "codeactor/internal/globalctx" + "codeactor/internal/llm" + "codeactor/internal/tools" +) + +//go:embed devops.prompt.md +var devopsPrompt string + +type DevOpsAgent struct { + BaseAgent + GlobalCtx *globalctx.GlobalCtx + Adapters []*tools.Adapter + maxSteps int +} + +func NewDevOpsAgent(globalCtx *globalctx.GlobalCtx, llm llm.Engine, maxSteps int) *DevOpsAgent { + var toolDefs []ToolDefinition + if err := json.Unmarshal(ToolsJSON, &toolDefs); err != nil { + // Non-fatal: agent falls back to no-tool mode. + } + + // DevOps agent uses a curated set of tools for operational tasks: + // run_bash for command execution, file tools for inspection, and + // thinking/micro_agent for analysis and self-correction. + adapters := make([]*tools.Adapter, 0, len(toolDefs)) + for _, def := range toolDefs { + var fn tools.ToolFunc + switch def.Name { + case "run_bash": + fn = globalCtx.SysOps.ExecuteRunBash + case "read_file": + fn = globalCtx.FileOps.ExecuteReadFile + case "list_dir": + fn = globalCtx.FileOps.ExecuteListDir + case "print_dir_tree": + fn = globalCtx.FileOps.ExecutePrintDirTree + case "search_by_regex": + fn = globalCtx.SearchOps.ExecuteGrepSearch + case "thinking": + fn = func(ctx context.Context, params map[string]interface{}) (interface{}, error) { + inputBytes, _ := json.Marshal(params) + return globalCtx.ThinkingTool.Call(ctx, string(inputBytes)) + } + case "micro_agent": + fn = globalCtx.MicroAgentTool.Execute + case "agent_exit": + fn = globalCtx.FlowOps.ExecuteAgentExit + default: + continue + } + + adapter := tools.NewAdapter(def.Name, def.Description, fn).WithSchema(def.Parameters) + adapters = append(adapters, adapter) + } + tools.SetGuardOnAdapters(adapters, globalCtx.Guard) + + return &DevOpsAgent{ + BaseAgent: BaseAgent{ + LLM: llm, + Publisher: globalCtx.Publisher, + }, + GlobalCtx: globalCtx, + Adapters: adapters, + maxSteps: maxSteps, + } +} + +func (a *DevOpsAgent) Name() string { + return "DevOps-Agent" +} + +func (a *DevOpsAgent) Run(ctx context.Context, input string) (string, error) { + cfg := ExecutorConfig{ + SystemPrompt: a.GlobalCtx.FormatPrompt(devopsPrompt), + UserInput: input, + Adapters: a.Adapters, + LLM: a.LLM, + MaxSteps: a.maxSteps, + Publisher: a.Publisher, + AgentName: a.Name(), + StopOnFinish: true, + } + return RunAgentLoop(ctx, cfg) +} diff --git a/internal/agents/devops.prompt.md b/internal/agents/devops.prompt.md new file mode 100644 index 0000000..0018fc2 --- /dev/null +++ b/internal/agents/devops.prompt.md @@ -0,0 +1,61 @@ +### Role +You are the **DevOps-Agent**, a pragmatic and resourceful infrastructure and operations specialist within the CodeActor system. You handle all non-coding operational tasks, from system administration and shell scripting to diagnosing infrastructure issues and running ad-hoc commands. + +Your Goal: Execute operational tasks precisely and safely, providing clear, actionable output. You are the go-to agent for anything that involves running commands, inspecting the system, managing processes, or interacting with the file system for non-code-related purposes. + +### Core Capabilities +1. **Shell Command Execution**: Run any bash command on the system — package management, process inspection, network diagnostics, file manipulation, environment checks, and more. +2. **File System Operations**: Read, list, and search files and directories. Useful for inspecting configuration files, logs, and system state. +3. **Thinking & Analysis**: Use `thinking` for self-correction and deep analysis when commands fail or you need to strategize. +4. **Isolated Reasoning**: Use `micro_agent` for focused, one-off analysis tasks that benefit from a fresh context (e.g., parsing command output, generating structured reports). + +### Tool Usage Guidelines + +**`run_bash` — Your Primary Tool** +- This is your main tool for accomplishing operational tasks. +- Always set `is_dangerous` correctly: **true** when the command affects anything outside the project workspace (system packages, services, network, kernel, processes, user-level config, sudo operations). **false** only for workspace-scoped operations. +- For long-running commands, set `is_background` to true. +- Always provide a clear `explanation` for why the command is needed. +- Chain commands with `&&` when you need sequential execution; use `;` only when you don't care about intermediate failures. +- Prefer non-interactive flags for commands that might prompt (e.g., `--yes` for npx, `-y` for apt). + +**`read_file` / `list_dir` / `print_dir_tree` / `search_by_regex` — File System Tools** +- Use these to inspect logs, configuration files, directory structures, and search for patterns. +- `search_by_regex` is powered by ripgrep — use it to find specific patterns across large directories efficiently. + +**`thinking` — Self-Correction** +- Use IMMEDIATELY when a command fails, produces unexpected output, or you're unsure how to proceed. +- Analyze the root cause, brainstorm solutions, and select the best approach before retrying. + +**`micro_agent` — Isolated Analysis** +- Use for tasks that benefit from a fresh LLM context: parsing complex command output, generating structured JSON/table summaries, or performing deep reasoning on results. + +### Workflow Strategy +1. **Understand**: Parse the user's request. What is the operational goal? What commands are needed? +2. **Plan**: Before running commands, think through the steps. Are there dependencies? What's the order of execution? +3. **Execute**: Run commands one at a time, checking output before proceeding to the next step. +4. **Verify**: Confirm each step succeeded before moving on. Use `thinking` if anything goes wrong. +5. **Report**: When the task is complete, summarize what was done and the results. Use `agent_exit` with a clear reason. + +### Safety Rules +1. **Read before write**: Always inspect file contents before modifying. +2. **Confirm dangerous operations**: Operations outside the workspace require `is_dangerous=true` — they will prompt the user for authorization. +3. **No destructive blind runs**: Never run `rm -rf`, `sudo` commands, or data-destroying operations without clear justification. +4. **Timeouts**: Use `is_background` for commands that may run long (builds, large data processing, network downloads). +5. **Idempotent when possible**: Prefer operations that can be safely retried. + +### Output Format +- Be concise and direct. State what you're doing and why. +- When showing command output, present it clearly (use code blocks for raw output). +- When a command fails, explain the error and your next steps. +- Use `agent_exit` when done — the `reason` should summarize what was accomplished. + +### Example Tasks +- "Check disk usage on the server" +- "Find all log files modified in the last 24 hours" +- "Restart the nginx service" +- "Check if port 8080 is in use" +- "List all running Docker containers" +- "Find large files (>100MB) in the project directory" +- "Run system diagnostics and generate a report" +- "Install the `jq` package for JSON processing" diff --git a/internal/app/app.go b/internal/app/app.go index fcb45cd..6d23fce 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -89,6 +89,7 @@ func (ca *CodingAssistant) Init(engine llm.Engine, workDir string) { repoMaxSteps := 20 codingMaxSteps := 30 chatMaxSteps := 10 + devopsMaxSteps := 15 conductorMaxSteps := 20 if ca.config != nil { @@ -101,6 +102,9 @@ func (ca *CodingAssistant) Init(engine llm.Engine, workDir string) { if ca.config.Agent.ChatMaxSteps > 0 { chatMaxSteps = ca.config.Agent.ChatMaxSteps } + if ca.config.Agent.DevOpsMaxSteps > 0 { + devopsMaxSteps = ca.config.Agent.DevOpsMaxSteps + } if ca.config.Agent.ConductorMaxSteps > 0 { conductorMaxSteps = ca.config.Agent.ConductorMaxSteps } @@ -117,7 +121,8 @@ func (ca *CodingAssistant) Init(engine llm.Engine, workDir string) { codingAgent := agents.NewCodingAgent(ca.globalCtx, engine, codingMaxSteps) chatAgent := agents.NewChatAgent(ca.globalCtx, engine, chatMaxSteps) metaAgent := agents.NewMetaAgent(ca.globalCtx, engine) - ca.conductor = agents.NewConductorAgent(ca.globalCtx, engine, repoAgent, codingAgent, chatAgent, metaAgent, conductorMaxSteps, disabledAgents, metaRetryCount) + devopsAgent := agents.NewDevOpsAgent(ca.globalCtx, engine, devopsMaxSteps) + ca.conductor = agents.NewConductorAgent(ca.globalCtx, engine, repoAgent, codingAgent, chatAgent, metaAgent, devopsAgent, conductorMaxSteps, disabledAgents, metaRetryCount) } func (ca *CodingAssistant) IntegrateMessaging(dispatcher *messaging.MessageDispatcher) { @@ -188,7 +193,7 @@ func (ca *CodingAssistant) ProcessConversation(req *TaskRequest) (string, error) } // parseDisabledAgents converts a comma-separated string of agent names -// into a map[string]bool for O(1) lookup. Valid agent names: repo, coding, chat, meta. +// into a map[string]bool for O(1) lookup. Valid agent names: repo, coding, chat, meta, devops. func parseDisabledAgents(s string) map[string]bool { result := make(map[string]bool) if s == "" { diff --git a/internal/config/config.go b/internal/config/config.go index 7956fe9..da78eba 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -40,6 +40,7 @@ type AgentConfig struct { CodingMaxSteps int `toml:"coding_max_steps"` ChatMaxSteps int `toml:"chat_max_steps"` RepoMaxSteps int `toml:"repo_max_steps"` + DevOpsMaxSteps int `toml:"devops_max_steps"` MetaMaxSteps int `toml:"meta_max_steps"` MetaRetryCount int `toml:"meta_retry_count"` SpeakLang string `toml:"lang"` diff --git a/main.go b/main.go index fea4790..5fd2bd1 100644 --- a/main.go +++ b/main.go @@ -43,7 +43,7 @@ func main() { // Check if running in TUI mode or HTTP server mode based on command line arguments if len(os.Args) < 2 { - fmt.Println("Usage: codeactor [tui|http] [--disable-agents=repo,coding,chat,meta] [--taskfile TASK.md] [--port=9800]") + fmt.Println("Usage: codeactor [tui|http] [--disable-agents=repo,coding,chat,meta,devops] [--taskfile TASK.md] [--port=9800]") os.Exit(1) } @@ -224,7 +224,7 @@ func main() { } default: fmt.Printf("Unknown mode: %s\n", mode) - fmt.Println("Usage: codeactor [tui|http] [--disable-agents=repo,coding,chat,meta] [--taskfile TASK.md] [--port=9800]") + fmt.Println("Usage: codeactor [tui|http] [--disable-agents=repo,coding,chat,meta,devops] [--taskfile TASK.md] [--port=9800]") os.Exit(1) } } From 119e69289f2374f71fc1be36b4a039010066fe43 Mon Sep 17 00:00:00 2001 From: iohub Date: Wed, 6 May 2026 11:30:58 +0800 Subject: [PATCH 3/4] docs: README & architecture.svg --- README.md | 45 ++++++++++++++++++++++++++++++++++++++----- README_zh.md | 45 ++++++++++++++++++++++++++++++++++++++----- docs/architecture.svg | 45 ++++++++++++++++++++++++------------------- 3 files changed, 105 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 5319b37..b56f5d9 100644 --- a/README.md +++ b/README.md @@ -2,15 +2,15 @@ An AI-powered autonomous coding assistant built with a **Hub-and-Spoke multi-agent architecture** in Go, backed by a Rust-based code analysis engine. -CodeActor Agent orchestrates multiple specialized agents — Conductor, Repo-Analyst, Coding-Engineer, Chat-Assistant, and Meta-Agent — to autonomously analyze, plan, and execute complex software engineering tasks with self-correction capabilities. +CodeActor Agent orchestrates multiple specialized agents — Conductor, Repo-Analyst, Coding-Engineer, Chat-Assistant, DevOps-Operator, and Meta-Agent — to autonomously analyze, plan, and execute complex software engineering tasks with self-correction capabilities. ## Features ### Multi-Agent System -- **Hub-and-Spoke Architecture** — Central Conductor delegates tasks to specialized sub-agents (Repo analysis, Code editing, General chat) +- **Hub-and-Spoke Architecture** — Central Conductor delegates tasks to specialized sub-agents (Repo analysis, Code editing, General chat, DevOps operations) - **Meta-Agent** — Autonomous agent designer that creates custom sub-agents at runtime for tasks beyond built-in agents' capabilities - **Self-Correction** — `thinking` tool enables agents to analyze errors and recover without blind retries -- **Agent Disable** — Conditionally exclude sub-agents at startup via `--disable-agents=repo,coding,chat,meta` +- **Agent Disable** — Conditionally exclude sub-agents at startup via `--disable-agents=repo,coding,chat,meta,devops` - **ImplPlan Tool** — Stateful implementation plan document for complex multi-step coding tasks ### Rich Tool System (17 tools) @@ -91,6 +91,7 @@ enable_streaming = true conductor_max_steps = 30 coding_max_steps = 50 repo_max_steps = 30 +devops_max_steps = 15 meta_max_steps = 30 meta_retry_count = 5 lang = "Chinese" @@ -150,10 +151,11 @@ Server defaults to `localhost:9080`. Override via `--host`/`--port` or `CODECACT | Agent | Tools | Count | |-------|-------|-------| -| Conductor | `delegate_repo`, `delegate_coding`, `delegate_chat`, `delegate_meta`, `finish`, `read_file`, `search_by_regex`, `list_dir`, `print_dir_tree` | 9 | +| Conductor | `delegate_repo`, `delegate_coding`, `delegate_chat`, `delegate_devops`, `delegate_meta`, `finish`, `read_file`, `search_by_regex`, `list_dir`, `print_dir_tree` | 10 | | CodingAgent | All 17 tools (file ops, search, shell, thinking, impl_plan, micro_agent) | 17 | | RepoAgent | `read_file`, `search_by_regex`, `list_dir`, `print_dir_tree`, `semantic_search`, `query_code_skeleton`, `query_code_snippet` | 7 | -| ChatAgent | None (pure LLM conversation) | 0 | +| ChatAgent | `micro_agent`, `thinking`, `finish` | 3 | +| DevOpsAgent | `run_bash`, `read_file`, `list_dir`, `print_dir_tree`, `search_by_regex`, `thinking`, `micro_agent`, `finish` | 8 | [Full architecture documentation →](docs/ARCHITECTURE.md) @@ -185,6 +187,39 @@ Disable Meta-Agent via startup flag: ./codeactor tui --disable-agents=meta ``` +## DevOps-Agent + +The **DevOps-Agent** is the operations and infrastructure specialist — it handles all non-coding operational tasks by executing shell commands, inspecting the file system, and analyzing command output. When the Conductor encounters a task involving system administration, log inspection, process management, or ad-hoc shell commands, it delegates to the DevOps-Agent via `delegate_devops`. + +### Capabilities + +- **Shell Command Execution** (`run_bash`) — Run any bash command with foreground/background support, danger detection, and workspace boundary checks +- **File System Inspection** — `read_file`, `list_dir`, `print_dir_tree`, `search_by_regex` for browsing logs, configs, and directories +- **Self-Correction** — `thinking` tool for analyzing command failures and adjusting approach before retrying +- **Isolated Analysis** — `micro_agent` for deep reasoning on command output or generating structured reports + +### Example use cases + +- Check disk usage, memory, and system resources +- Find all log files modified in the last 24 hours +- Restart services or check process status +- Inspect configuration files +- Run system diagnostics and generate reports +- Execute ad-hoc shell pipelines for data processing + +### Configuration + +```toml +[agent] +devops_max_steps = 15 # Max LLM steps for DevOps-Agent (default: 15) +``` + +Disable DevOps-Agent via startup flag: + +```bash +./codeactor tui --disable-agents=devops +``` + ## Codebase Analysis Engine The `codeactor-codebase` is a standalone **Rust** service that provides deep code analysis capabilities. It runs as a background HTTP server managed automatically by the Go binary. diff --git a/README_zh.md b/README_zh.md index e76ba0e..6873f36 100644 --- a/README_zh.md +++ b/README_zh.md @@ -2,15 +2,15 @@ 基于 Go 语言开发的 **Hub-and-Spoke(中枢-辐条)多智能体架构** AI 自主编程助手,后端由 Rust 代码分析引擎支撑。 -CodeActor Agent 协调多个专用智能体——指挥家(Conductor)、仓库分析员(Repo-Analyst)、编码工程师(Coding-Engineer)、对话助手(Chat-Assistant)和元代理(Meta-Agent)——自主完成复杂的软件工程任务,具备自我纠错能力。 +CodeActor Agent 协调多个专用智能体——指挥家(Conductor)、仓库分析员(Repo-Analyst)、编码工程师(Coding-Engineer)、对话助手(Chat-Assistant)、运维操作员(DevOps-Operator)和元代理(Meta-Agent)——自主完成复杂的软件工程任务,具备自我纠错能力。 ## 特性 ### 多智能体系统 -- **中枢-辐条架构** — 中央 Conductor 将任务委派给专用子智能体(仓库分析、代码编辑、通用对话) +- **中枢-辐条架构** — 中央 Conductor 将任务委派给专用子智能体(仓库分析、代码编辑、通用对话、运维操作) - **元代理(Meta-Agent)** — 自主设计代理,在运行时为超出内置 Agent 能力的任务动态创建自定义子智能体 - **自我修正** — `thinking` 工具使 Agent 能够在出错时分析原因并恢复,避免盲目重试 -- **Agent 禁用** — 通过 `--disable-agents=repo,coding,chat,meta` 在启动时有条件地排除子智能体 +- **Agent 禁用** — 通过 `--disable-agents=repo,coding,chat,meta,devops` 在启动时有条件地排除子智能体 - **ImplPlan 工具** — 状态化实现计划文档,用于复杂多步骤编码任务的分步规划 ### 丰富工具系统(17 个工具) @@ -90,6 +90,7 @@ enable_streaming = true conductor_max_steps = 30 coding_max_steps = 50 repo_max_steps = 30 +devops_max_steps = 15 meta_max_steps = 30 meta_retry_count = 5 lang = "Chinese" @@ -149,10 +150,11 @@ node index.js history # 列出最近任务 | Agent | 工具 | 数量 | |-------|-------|-------| -| Conductor | `delegate_repo`、`delegate_coding`、`delegate_chat`、`delegate_meta`、`finish`、`read_file`、`search_by_regex`、`list_dir`、`print_dir_tree` | 9 | +| Conductor | `delegate_repo`、`delegate_coding`、`delegate_chat`、`delegate_devops`、`delegate_meta`、`finish`、`read_file`、`search_by_regex`、`list_dir`、`print_dir_tree` | 10 | | CodingAgent | 全部 17 个工具(文件、搜索、Shell、thinking、impl_plan、micro_agent) | 17 | | RepoAgent | `read_file`、`search_by_regex`、`list_dir`、`print_dir_tree`、`semantic_search`、`query_code_skeleton`、`query_code_snippet` | 7 | -| ChatAgent | 无(纯 LLM 对话) | 0 | +| ChatAgent | `micro_agent`、`thinking`、`finish` | 3 | +| DevOpsAgent | `run_bash`、`read_file`、`list_dir`、`print_dir_tree`、`search_by_regex`、`thinking`、`micro_agent`、`finish` | 8 | [完整架构文档 →](docs/ARCHITECTURE.md) @@ -184,6 +186,39 @@ meta_retry_count = 5 # JSON 解析失败重试次数(默认 5) ./codeactor tui --disable-agents=meta ``` +## DevOps-Agent(运维代理) + +**DevOps-Agent** 是运维和基础设施专家——通过执行 Shell 命令、检查文件系统和分析命令输出来处理所有非编码的运维任务。当 Conductor 遇到系统管理、日志检查、进程管理或 ad-hoc shell 命令类任务时,会通过 `delegate_devops` 委派给 DevOps-Agent。 + +### 核心能力 + +- **Shell 命令执行** (`run_bash`) — 运行任意 bash 命令,支持前台/后台运行,含危险检测和工作空间边界检查 +- **文件系统检查** — `read_file`、`list_dir`、`print_dir_tree`、`search_by_regex` 用于浏览日志、配置和目录 +- **自我修正** — 使用 `thinking` 工具分析命令失败原因,调整策略后重试 +- **独立分析** — 使用 `micro_agent` 对命令输出进行深度推理或生成结构化报告 + +### 示例用例 + +- 检查磁盘使用率、内存和系统资源 +- 查找最近 24 小时内修改的所有日志文件 +- 重启服务或检查进程状态 +- 检查配置文件 +- 运行系统诊断并生成报告 +- 执行 ad-hoc shell 管道进行数据处理 + +### 配置 + +```toml +[agent] +devops_max_steps = 15 # DevOps-Agent 最大 LLM 步数(默认 15) +``` + +通过启动参数禁用 DevOps-Agent: + +```bash +./codeactor tui --disable-agents=devops +``` + ## Codebase 分析引擎 `codeactor-codebase` 是一个独立的 **Rust** 服务,提供深度代码分析能力。它作为后台 HTTP 服务器运行,由 Go 二进制自动管理。 diff --git a/docs/architecture.svg b/docs/architecture.svg index 8b4c141..899d4aa 100644 --- a/docs/architecture.svg +++ b/docs/architecture.svg @@ -90,31 +90,36 @@ ConductorAgent orchestrate → delegate → review → iterate (max 30 steps) - - - - - + + + + + + - - RepoAgent - read-only · pre-investigate + + RepoAgent + read-only - - CodingAgent - read/write · 17 tools + + CodingAgent + read/write · 17 tools - - ChatAgent - no tools · t=0.7 + + ChatAgent + 3 tools · t=0.7 - - MetaAgent - design → register → execute + + DevOpsAgent + run_bash · 8 tools + + + MetaAgent + design → register - - + + @@ -197,7 +202,7 @@ ImplPlan (stateful) finish · ask_user delegate_repo - delegate_coding/chat/meta + delegate_coding/chat/devops/meta From 9c3a9824238b1f56e9a8c0278dd2feb5059242be Mon Sep 17 00:00:00 2001 From: iohub Date: Wed, 6 May 2026 11:33:25 +0800 Subject: [PATCH 4/4] docs: README & architecture.svg --- README.md | 2 -- README_zh.md | 2 -- 2 files changed, 4 deletions(-) diff --git a/README.md b/README.md index b56f5d9..bdbccf0 100644 --- a/README.md +++ b/README.md @@ -42,8 +42,6 @@ CodeActor Agent orchestrates multiple specialized agents — Conductor, Repo-Ana - **Call Graph Analysis** — Function-level call graphs with caller/callee relations, cycle detection, complexity scoring - **Semantic Code Search** — Vector embeddings via LanceDB + SQLite cache, OpenAI `text-embedding-3-small` - **Code Skeleton/Snippet** — Batch file skeleton extraction and per-function code snippet retrieval -- **File Watching** — Automatic re-indexing on file changes with 20s debounce -- **Auto-Launch** — Go binary automatically starts the Rust codebase server as a child process with dynamic port allocation, health-check polling, and cleanup on exit ## Screenshots diff --git a/README_zh.md b/README_zh.md index 6873f36..b9b2965 100644 --- a/README_zh.md +++ b/README_zh.md @@ -42,8 +42,6 @@ CodeActor Agent 协调多个专用智能体——指挥家(Conductor)、仓 - **调用图分析** — 函数级调用图,含调用者/被调者关系、环路检测、复杂度评分 - **语义代码搜索** — 通过 LanceDB + SQLite 缓存的向量嵌入(OpenAI `text-embedding-3-small`) - **代码骨架/片段** — 批量文件骨架提取和按函数的代码片段检索 -- **文件监听** — 基于 `notify` 的文件系统监听,20s 防抖自动重索引 -- **自动启动** — Go 二进制自动以子进程方式启动 Rust codebase 服务器,包含动态端口分配、健康检查轮询和退出清理 ## 效果截图