From c8ca3270fc7dd8ad027b3321e676060ae669a6f7 Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Mon, 27 Apr 2026 02:05:21 +0200 Subject: [PATCH] refactor: Move computer_use to a separate config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #440. Mirrors the channels.yaml extraction (#444) — `computer_use` now lives in `/computer_use.yaml` with project→userspace fallback. `infer init` seeds the file (with one-shot legacy migration from a `computer_use:` block in config.yaml), and the path is added to `tools.sandbox.protected_paths` defaults. `INFER_COMPUTER_USE_*` env vars keep working via `applyComputerUseEnvOverrides`. --- .infer/computer_use.yaml | 40 +++ .infer/config.yaml | 41 +-- cmd/config.go | 96 +++++++ cmd/init.go | 43 +++- cmd/init_test.go | 2 +- config/computer_use.go | 72 ++++++ config/computer_use_test.go | 281 +++++++++++++++++++++ config/config.go | 54 +--- internal/agent/tools/keyboard_type_test.go | 4 +- 9 files changed, 536 insertions(+), 97 deletions(-) create mode 100644 .infer/computer_use.yaml create mode 100644 config/computer_use.go create mode 100644 config/computer_use_test.go diff --git a/.infer/computer_use.yaml b/.infer/computer_use.yaml new file mode 100644 index 00000000..d6ab39a3 --- /dev/null +++ b/.infer/computer_use.yaml @@ -0,0 +1,40 @@ +--- +enabled: false +floating_window: + enabled: true + respawn_on_close: true + position: top-right + always_on_top: true +screenshot: + enabled: true + max_width: 1920 + max_height: 1080 + target_width: 1024 + target_height: 768 + format: jpeg + quality: 85 + streaming_enabled: true + capture_interval: 3 + buffer_size: 5 + temp_dir: "" + log_captures: false + show_overlay: true +rate_limit: + enabled: true + max_actions_per_minute: 60 + window_seconds: 60 +tools: + mouse_move: + enabled: true + mouse_click: + enabled: true + mouse_scroll: + enabled: true + keyboard_type: + enabled: true + max_text_length: 1000 + typing_delay_ms: 100 + get_focused_app: + enabled: true + activate_app: + enabled: true diff --git a/.infer/config.yaml b/.infer/config.yaml index acb40288..01540581 100644 --- a/.infer/config.yaml +++ b/.infer/config.yaml @@ -60,6 +60,7 @@ tools: - .infer/keybindings.yaml - .infer/prompts.yaml - .infer/channels.yaml + - .infer/computer_use.yaml - .git/ - '*.env' bash: @@ -273,43 +274,3 @@ web: install_version: latest install_dir: ~/.local/bin servers: [] -computer_use: - enabled: false - floating_window: - enabled: true - respawn_on_close: true - position: top-right - always_on_top: true - screenshot: - enabled: true - max_width: 1920 - max_height: 1080 - target_width: 1024 - target_height: 768 - format: jpeg - quality: 85 - streaming_enabled: true - capture_interval: 3 - buffer_size: 5 - temp_dir: "" - log_captures: false - show_overlay: true - rate_limit: - enabled: true - max_actions_per_minute: 60 - window_seconds: 60 - tools: - mouse_move: - enabled: true - mouse_click: - enabled: true - mouse_scroll: - enabled: true - keyboard_type: - enabled: true - max_text_length: 1000 - typing_delay_ms: 100 - get_focused_app: - enabled: true - activate_app: - enabled: true diff --git a/cmd/config.go b/cmd/config.go index 9917c646..fb46f1b9 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -606,6 +606,27 @@ func getEffectiveChannelsConfigPath() string { return config.DefaultChannelsPath } +// getEffectiveComputerUseConfigPath returns the path to the computer_use config file +// Searches in this order: 1) project .infer/computer_use.yaml, 2) user home ~/.infer/computer_use.yaml +func getEffectiveComputerUseConfigPath() string { + searchPaths := []string{ + config.DefaultComputerUsePath, + } + + if homeDir, err := os.UserHomeDir(); err == nil { + homePath := filepath.Join(homeDir, config.ConfigDirName, config.ComputerUseFileName) + searchPaths = append(searchPaths, homePath) + } + + for _, path := range searchPaths { + if _, err := os.Stat(path); err == nil { + return path + } + } + + return config.DefaultComputerUsePath +} + // getEffectivePromptsConfigPath returns the path to the prompts config file // Searches in this order: 1) project .infer/prompts.yaml, 2) user home ~/.infer/prompts.yaml func getEffectivePromptsConfigPath() string { @@ -688,6 +709,15 @@ func loadConfigFromViper() (*config.Config, error) { cfg.Channels = *channelsCfg applyChannelsEnvOverrides(cfg) + cuPath := getEffectiveComputerUseConfigPath() + cuCfg, err := config.LoadComputerUse(cuPath) + if err != nil { + logger.Warn("Failed to load computer_use config, using defaults", "error", err, "path", cuPath) + cuCfg = config.DefaultComputerUseConfig() + } + cfg.ComputerUse = *cuCfg + applyComputerUseEnvOverrides(cfg) + return cfg, nil } @@ -867,6 +897,72 @@ func applyChannelsEnvOverrides(cfg *config.Config) { setStringSlice("INFER_CHANNELS_WHATSAPP_ALLOWED_USERS", &cfg.Channels.WhatsApp.AllowedUsers) } +// applyComputerUseEnvOverrides applies INFER_COMPUTER_USE_* env vars onto +// the in-memory computer_use config. Run AFTER LoadComputerUse so envs win +// over computer_use.yaml. The computer_use config now lives in its own +// file (yaml:"-" mapstructure:"-" on Config.ComputerUse), so viper does not +// bind these env vars itself - this function is the single source of +// env-var support. Mirrors applyChannelsEnvOverrides. +func applyComputerUseEnvOverrides(cfg *config.Config) { + setBool := func(env string, target *bool) { + val, ok := os.LookupEnv(env) + if !ok { + return + } + if b, err := strconv.ParseBool(strings.TrimSpace(val)); err == nil { + *target = b + } + } + setInt := func(env string, target *int) { + val, ok := os.LookupEnv(env) + if !ok { + return + } + if n, err := strconv.Atoi(strings.TrimSpace(val)); err == nil { + *target = n + } + } + setString := func(env string, target *string) { + if val, ok := os.LookupEnv(env); ok { + *target = val + } + } + + setBool("INFER_COMPUTER_USE_ENABLED", &cfg.ComputerUse.Enabled) + + setBool("INFER_COMPUTER_USE_FLOATING_WINDOW_ENABLED", &cfg.ComputerUse.FloatingWindow.Enabled) + setBool("INFER_COMPUTER_USE_FLOATING_WINDOW_RESPAWN_ON_CLOSE", &cfg.ComputerUse.FloatingWindow.RespawnOnClose) + setString("INFER_COMPUTER_USE_FLOATING_WINDOW_POSITION", &cfg.ComputerUse.FloatingWindow.Position) + setBool("INFER_COMPUTER_USE_FLOATING_WINDOW_ALWAYS_ON_TOP", &cfg.ComputerUse.FloatingWindow.AlwaysOnTop) + + setBool("INFER_COMPUTER_USE_SCREENSHOT_ENABLED", &cfg.ComputerUse.Screenshot.Enabled) + setInt("INFER_COMPUTER_USE_SCREENSHOT_MAX_WIDTH", &cfg.ComputerUse.Screenshot.MaxWidth) + setInt("INFER_COMPUTER_USE_SCREENSHOT_MAX_HEIGHT", &cfg.ComputerUse.Screenshot.MaxHeight) + setInt("INFER_COMPUTER_USE_SCREENSHOT_TARGET_WIDTH", &cfg.ComputerUse.Screenshot.TargetWidth) + setInt("INFER_COMPUTER_USE_SCREENSHOT_TARGET_HEIGHT", &cfg.ComputerUse.Screenshot.TargetHeight) + setString("INFER_COMPUTER_USE_SCREENSHOT_FORMAT", &cfg.ComputerUse.Screenshot.Format) + setInt("INFER_COMPUTER_USE_SCREENSHOT_QUALITY", &cfg.ComputerUse.Screenshot.Quality) + setBool("INFER_COMPUTER_USE_SCREENSHOT_STREAMING_ENABLED", &cfg.ComputerUse.Screenshot.StreamingEnabled) + setInt("INFER_COMPUTER_USE_SCREENSHOT_CAPTURE_INTERVAL", &cfg.ComputerUse.Screenshot.CaptureInterval) + setInt("INFER_COMPUTER_USE_SCREENSHOT_BUFFER_SIZE", &cfg.ComputerUse.Screenshot.BufferSize) + setString("INFER_COMPUTER_USE_SCREENSHOT_TEMP_DIR", &cfg.ComputerUse.Screenshot.TempDir) + setBool("INFER_COMPUTER_USE_SCREENSHOT_LOG_CAPTURES", &cfg.ComputerUse.Screenshot.LogCaptures) + setBool("INFER_COMPUTER_USE_SCREENSHOT_SHOW_OVERLAY", &cfg.ComputerUse.Screenshot.ShowOverlay) + + setBool("INFER_COMPUTER_USE_RATE_LIMIT_ENABLED", &cfg.ComputerUse.RateLimit.Enabled) + setInt("INFER_COMPUTER_USE_RATE_LIMIT_MAX_ACTIONS_PER_MINUTE", &cfg.ComputerUse.RateLimit.MaxActionsPerMinute) + setInt("INFER_COMPUTER_USE_RATE_LIMIT_WINDOW_SECONDS", &cfg.ComputerUse.RateLimit.WindowSeconds) + + setBool("INFER_COMPUTER_USE_TOOLS_MOUSE_MOVE_ENABLED", &cfg.ComputerUse.Tools.MouseMove.Enabled) + setBool("INFER_COMPUTER_USE_TOOLS_MOUSE_CLICK_ENABLED", &cfg.ComputerUse.Tools.MouseClick.Enabled) + setBool("INFER_COMPUTER_USE_TOOLS_MOUSE_SCROLL_ENABLED", &cfg.ComputerUse.Tools.MouseScroll.Enabled) + setBool("INFER_COMPUTER_USE_TOOLS_KEYBOARD_TYPE_ENABLED", &cfg.ComputerUse.Tools.KeyboardType.Enabled) + setInt("INFER_COMPUTER_USE_TOOLS_KEYBOARD_TYPE_MAX_TEXT_LENGTH", &cfg.ComputerUse.Tools.KeyboardType.MaxTextLength) + setInt("INFER_COMPUTER_USE_TOOLS_KEYBOARD_TYPE_TYPING_DELAY_MS", &cfg.ComputerUse.Tools.KeyboardType.TypingDelayMs) + setBool("INFER_COMPUTER_USE_TOOLS_GET_FOCUSED_APP_ENABLED", &cfg.ComputerUse.Tools.GetFocusedApp.Enabled) + setBool("INFER_COMPUTER_USE_TOOLS_ACTIVATE_APP_ENABLED", &cfg.ComputerUse.Tools.ActivateApp.Enabled) +} + // GetUserspaceFlag checks for --userspace flag on the current command or parent commands func GetUserspaceFlag(cmd *cobra.Command) bool { if userspace, err := cmd.Flags().GetBool("userspace"); err == nil && userspace { diff --git a/cmd/init.go b/cmd/init.go index 13a5b6a3..3df20f4c 100644 --- a/cmd/init.go +++ b/cmd/init.go @@ -39,7 +39,7 @@ func initializeProject(cmd *cobra.Command) error { //nolint:funlen userspace, _ := cmd.Flags().GetBool("userspace") skipMigrations, _ := cmd.Flags().GetBool("skip-migrations") - var configPath, gitignorePath, scmShortcutsPath, gitShortcutsPath, mcpShortcutsPath, shellsShortcutsPath, exportShortcutsPath, a2aShortcutsPath, mcpPath, keybindingsPath, promptsPath, channelsPath, agentsPath string + var configPath, gitignorePath, scmShortcutsPath, gitShortcutsPath, mcpShortcutsPath, shellsShortcutsPath, exportShortcutsPath, a2aShortcutsPath, mcpPath, keybindingsPath, promptsPath, channelsPath, computerUsePath, agentsPath string if userspace { homeDir, err := os.UserHomeDir() @@ -58,6 +58,7 @@ func initializeProject(cmd *cobra.Command) error { //nolint:funlen keybindingsPath = filepath.Join(homeDir, config.ConfigDirName, config.KeybindingsFileName) promptsPath = filepath.Join(homeDir, config.ConfigDirName, config.PromptsFileName) channelsPath = filepath.Join(homeDir, config.ConfigDirName, config.ChannelsFileName) + computerUsePath = filepath.Join(homeDir, config.ConfigDirName, config.ComputerUseFileName) agentsPath = filepath.Join(homeDir, config.ConfigDirName, config.AgentsFileName) } else { configPath = config.DefaultConfigPath @@ -72,11 +73,12 @@ func initializeProject(cmd *cobra.Command) error { //nolint:funlen keybindingsPath = config.DefaultKeybindingsPath promptsPath = config.DefaultPromptsPath channelsPath = config.DefaultChannelsPath + computerUsePath = config.DefaultComputerUsePath agentsPath = config.DefaultAgentsPath } if !overwrite { - if err := validateFilesNotExist(configPath, gitignorePath, scmShortcutsPath, gitShortcutsPath, mcpShortcutsPath, shellsShortcutsPath, exportShortcutsPath, a2aShortcutsPath, mcpPath, keybindingsPath, promptsPath, channelsPath, agentsPath); err != nil { + if err := validateFilesNotExist(configPath, gitignorePath, scmShortcutsPath, gitShortcutsPath, mcpShortcutsPath, shellsShortcutsPath, exportShortcutsPath, a2aShortcutsPath, mcpPath, keybindingsPath, promptsPath, channelsPath, computerUsePath, agentsPath); err != nil { return err } } @@ -140,6 +142,11 @@ tmp/ return fmt.Errorf("failed to create channels config file: %w", err) } + cuMigrated, err := createComputerUseConfigFile(computerUsePath) + if err != nil { + return fmt.Errorf("failed to create computer_use config file: %w", err) + } + if err := createAgentsConfigFile(agentsPath); err != nil { return fmt.Errorf("failed to create agents config file: %w", err) } @@ -164,11 +171,16 @@ tmp/ fmt.Printf(" Created: %s\n", keybindingsPath) fmt.Printf(" Created: %s\n", promptsPath) fmt.Printf(" Created: %s\n", channelsPath) + fmt.Printf(" Created: %s\n", computerUsePath) fmt.Printf(" Created: %s\n", agentsPath) if migrated { fmt.Printf("\n%s Migrated legacy `channels:` block from config.yaml into %s.\n", icons.CheckMarkStyle.Render(icons.CheckMark), channelsPath) fmt.Printf(" You can now remove the `channels:` block from %s.\n", configPath) } + if cuMigrated { + fmt.Printf("\n%s Migrated legacy `computer_use:` block from config.yaml into %s.\n", icons.CheckMarkStyle.Render(icons.CheckMark), computerUsePath) + fmt.Printf(" You can now remove the `computer_use:` block from %s.\n", configPath) + } fmt.Println("") if userspace { fmt.Println("This userspace configuration will be used as a fallback for all projects.") @@ -449,6 +461,33 @@ func createChannelsConfigFile(path string) (bool, error) { return migrated, nil } +// createComputerUseConfigFile writes a fresh computer_use.yaml. Returns +// true when the file was seeded from a legacy `computer_use:` block found +// in viper (i.e. migrated from config.yaml) rather than from in-code +// defaults. Migration only runs when no computer_use.yaml exists yet, so +// it is safe to re-run init. +func createComputerUseConfigFile(path string) (bool, error) { + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return false, fmt.Errorf("failed to create config directory: %w", err) + } + + cuCfg := config.DefaultComputerUseConfig() + migrated := false + + if _, err := os.Stat(path); os.IsNotExist(err) && V != nil && V.IsSet("computer_use") { + legacy := config.DefaultComputerUseConfig() + if err := V.UnmarshalKey("computer_use", legacy); err == nil { + cuCfg = legacy + migrated = true + } + } + + if err := config.SaveComputerUse(path, cuCfg); err != nil { + return false, err + } + return migrated, nil +} + // createAgentsConfigFile writes a fresh agents.yaml seeded from the in-code // defaults so users can manage A2A agents via `infer agents` commands. func createAgentsConfigFile(path string) error { diff --git a/cmd/init_test.go b/cmd/init_test.go index 3042b1f1..cfb99598 100644 --- a/cmd/init_test.go +++ b/cmd/init_test.go @@ -27,7 +27,7 @@ func TestInitializeProject(t *testing.T) { "userspace": false, "skip-migrations": true, }, - wantFiles: []string{".infer/config.yaml", ".infer/.gitignore"}, + wantFiles: []string{".infer/config.yaml", ".infer/.gitignore", ".infer/computer_use.yaml"}, wantNoFiles: []string{"AGENTS.md"}, wantErr: false, }, diff --git a/config/computer_use.go b/config/computer_use.go new file mode 100644 index 00000000..2ac7061f --- /dev/null +++ b/config/computer_use.go @@ -0,0 +1,72 @@ +package config + +import ( + utils "github.com/inference-gateway/cli/config/utils" +) + +const ( + ComputerUseFileName = "computer_use.yaml" + DefaultComputerUsePath = ConfigDirName + "/" + ComputerUseFileName +) + +// DefaultComputerUseConfig returns the in-code default computer_use +// configuration used when no computer_use.yaml file exists. `infer init` +// seeds the file from this and the runtime falls back to it when the file +// is absent. +func DefaultComputerUseConfig() *ComputerUseConfig { + return &ComputerUseConfig{ + Enabled: false, + FloatingWindow: FloatingWindowConfig{ + Enabled: true, + RespawnOnClose: true, + Position: "top-right", + AlwaysOnTop: true, + }, + Screenshot: ScreenshotToolConfig{ + Enabled: true, + MaxWidth: 1920, + MaxHeight: 1080, + TargetWidth: 1024, + TargetHeight: 768, + Format: "jpeg", + Quality: 85, + StreamingEnabled: true, + CaptureInterval: 3, + BufferSize: 5, + TempDir: "", + LogCaptures: false, + ShowOverlay: true, + }, + RateLimit: RateLimitConfig{ + Enabled: true, + MaxActionsPerMinute: 60, + WindowSeconds: 60, + }, + Tools: ComputerUseToolsConfig{ + MouseMove: MouseMoveToolConfig{Enabled: true}, + MouseClick: MouseClickToolConfig{Enabled: true}, + MouseScroll: MouseScrollToolConfig{Enabled: true}, + KeyboardType: KeyboardTypeToolConfig{ + Enabled: true, + MaxTextLength: 1000, + TypingDelayMs: 100, + }, + GetFocusedApp: GetFocusedAppToolConfig{Enabled: true}, + ActivateApp: ActivateAppToolConfig{Enabled: true}, + }, + } +} + +// LoadComputerUse reads computer_use.yaml from disk. When the file is +// missing it returns the in-code defaults so callers can treat absence as +// "use defaults" without special-casing. The file body is run through +// os.ExpandEnv so `${VAR}`-style references resolve from the environment. +func LoadComputerUse(path string) (*ComputerUseConfig, error) { + return utils.LoadYAML(path, "computer_use", DefaultComputerUseConfig) +} + +// SaveComputerUse writes the computer_use configuration to disk, creating +// any missing parent directories. +func SaveComputerUse(path string, cfg *ComputerUseConfig) error { + return utils.SaveYAML(path, "computer_use", cfg) +} diff --git a/config/computer_use_test.go b/config/computer_use_test.go new file mode 100644 index 00000000..edc06775 --- /dev/null +++ b/config/computer_use_test.go @@ -0,0 +1,281 @@ +package config_test + +import ( + "os" + "path/filepath" + "testing" + + config "github.com/inference-gateway/cli/config" +) + +func TestComputerUseConstants(t *testing.T) { + if config.ComputerUseFileName != "computer_use.yaml" { + t.Errorf("Expected ComputerUseFileName 'computer_use.yaml', got %q", config.ComputerUseFileName) + } + expectedPath := config.ConfigDirName + "/" + config.ComputerUseFileName + if config.DefaultComputerUsePath != expectedPath { + t.Errorf("Expected DefaultComputerUsePath %q, got %q", expectedPath, config.DefaultComputerUsePath) + } +} + +func TestDefaultComputerUseConfig(t *testing.T) { + cfg := config.DefaultComputerUseConfig() + if cfg == nil { + t.Fatal("DefaultComputerUseConfig() returned nil") + } + if cfg.Enabled { + t.Error("Expected Enabled to be false by default") + } + if !cfg.FloatingWindow.Enabled { + t.Error("Expected FloatingWindow.Enabled to be true by default") + } + if cfg.FloatingWindow.Position != "top-right" { + t.Errorf("Expected FloatingWindow.Position 'top-right', got %q", cfg.FloatingWindow.Position) + } + if cfg.Screenshot.MaxWidth != 1920 { + t.Errorf("Expected Screenshot.MaxWidth=1920, got %d", cfg.Screenshot.MaxWidth) + } + if cfg.Screenshot.MaxHeight != 1080 { + t.Errorf("Expected Screenshot.MaxHeight=1080, got %d", cfg.Screenshot.MaxHeight) + } + if cfg.Screenshot.Format != "jpeg" { + t.Errorf("Expected Screenshot.Format 'jpeg', got %q", cfg.Screenshot.Format) + } + if cfg.Screenshot.Quality != 85 { + t.Errorf("Expected Screenshot.Quality=85, got %d", cfg.Screenshot.Quality) + } + if !cfg.Screenshot.StreamingEnabled { + t.Error("Expected Screenshot.StreamingEnabled true") + } + if cfg.RateLimit.MaxActionsPerMinute != 60 { + t.Errorf("Expected RateLimit.MaxActionsPerMinute=60, got %d", cfg.RateLimit.MaxActionsPerMinute) + } + if cfg.RateLimit.WindowSeconds != 60 { + t.Errorf("Expected RateLimit.WindowSeconds=60, got %d", cfg.RateLimit.WindowSeconds) + } + if cfg.Tools.KeyboardType.MaxTextLength != 1000 { + t.Errorf("Expected Tools.KeyboardType.MaxTextLength=1000, got %d", cfg.Tools.KeyboardType.MaxTextLength) + } + if cfg.Tools.KeyboardType.TypingDelayMs != 100 { + t.Errorf("Expected Tools.KeyboardType.TypingDelayMs=100, got %d", cfg.Tools.KeyboardType.TypingDelayMs) + } + if !cfg.Tools.MouseMove.Enabled { + t.Error("Expected Tools.MouseMove.Enabled true") + } + if !cfg.Tools.GetFocusedApp.Enabled { + t.Error("Expected Tools.GetFocusedApp.Enabled true") + } +} + +func TestLoadComputerUse_NonExistentFile(t *testing.T) { + tempDir := t.TempDir() + path := filepath.Join(tempDir, "non-existent.yaml") + + cfg, err := config.LoadComputerUse(path) + if err != nil { + t.Fatalf("LoadComputerUse() should not error for missing file, got: %v", err) + } + if cfg == nil { + t.Fatal("LoadComputerUse() returned nil") + } + defaults := config.DefaultComputerUseConfig() + if cfg.Enabled != defaults.Enabled || cfg.Screenshot.MaxWidth != defaults.Screenshot.MaxWidth { + t.Errorf("Expected defaults, got %+v", cfg) + } +} + +func TestLoadComputerUse_ValidYAML(t *testing.T) { + tempDir := t.TempDir() + path := filepath.Join(tempDir, "computer_use.yaml") + + yamlContent := `--- +enabled: true +floating_window: + enabled: false + position: bottom-left + always_on_top: false + respawn_on_close: false +screenshot: + enabled: true + max_width: 800 + max_height: 600 + target_width: 640 + target_height: 480 + format: png + quality: 100 + streaming_enabled: false + capture_interval: 5 + buffer_size: 2 + temp_dir: /tmp/cu + log_captures: true + show_overlay: false +rate_limit: + enabled: false + max_actions_per_minute: 30 + window_seconds: 30 +tools: + mouse_move: + enabled: false + mouse_click: + enabled: false + mouse_scroll: + enabled: false + keyboard_type: + enabled: true + max_text_length: 500 + typing_delay_ms: 50 + get_focused_app: + enabled: false + activate_app: + enabled: false +` + if err := os.WriteFile(path, []byte(yamlContent), 0644); err != nil { + t.Fatalf("Failed to write yaml: %v", err) + } + + cfg, err := config.LoadComputerUse(path) + if err != nil { + t.Fatalf("LoadComputerUse() failed: %v", err) + } + if !cfg.Enabled { + t.Error("Expected Enabled true") + } + if cfg.FloatingWindow.Position != "bottom-left" { + t.Errorf("Expected FloatingWindow.Position 'bottom-left', got %q", cfg.FloatingWindow.Position) + } + if cfg.Screenshot.MaxWidth != 800 { + t.Errorf("Expected Screenshot.MaxWidth=800, got %d", cfg.Screenshot.MaxWidth) + } + if cfg.Screenshot.Format != "png" { + t.Errorf("Expected Screenshot.Format 'png', got %q", cfg.Screenshot.Format) + } + if cfg.RateLimit.Enabled { + t.Error("Expected RateLimit.Enabled false") + } + if cfg.RateLimit.MaxActionsPerMinute != 30 { + t.Errorf("Expected RateLimit.MaxActionsPerMinute=30, got %d", cfg.RateLimit.MaxActionsPerMinute) + } + if cfg.Tools.MouseMove.Enabled { + t.Error("Expected Tools.MouseMove.Enabled false") + } + if cfg.Tools.KeyboardType.MaxTextLength != 500 { + t.Errorf("Expected Tools.KeyboardType.MaxTextLength=500, got %d", cfg.Tools.KeyboardType.MaxTextLength) + } +} + +func TestLoadComputerUse_EnvironmentVariableExpansion(t *testing.T) { + tempDir := t.TempDir() + path := filepath.Join(tempDir, "computer_use.yaml") + + t.Setenv("TEST_CU_TEMP_DIR", "/var/tmp/expanded") + + yamlContent := `--- +enabled: true +screenshot: + temp_dir: "${TEST_CU_TEMP_DIR}" +` + if err := os.WriteFile(path, []byte(yamlContent), 0644); err != nil { + t.Fatalf("Failed to write yaml: %v", err) + } + + cfg, err := config.LoadComputerUse(path) + if err != nil { + t.Fatalf("LoadComputerUse() failed: %v", err) + } + if cfg.Screenshot.TempDir != "/var/tmp/expanded" { + t.Errorf("Expected expanded temp_dir '/var/tmp/expanded', got %q", cfg.Screenshot.TempDir) + } +} + +func TestLoadComputerUse_InvalidYAML(t *testing.T) { + tempDir := t.TempDir() + path := filepath.Join(tempDir, "computer_use.yaml") + if err := os.WriteFile(path, []byte("not: valid: yaml: ["), 0644); err != nil { + t.Fatalf("Failed to write yaml: %v", err) + } + + if _, err := config.LoadComputerUse(path); err == nil { + t.Fatal("Expected error from invalid YAML, got nil") + } +} + +func TestSaveComputerUse_RoundTrip(t *testing.T) { + tempDir := t.TempDir() + path := filepath.Join(tempDir, "subdir", "computer_use.yaml") + + cfg := &config.ComputerUseConfig{ + Enabled: true, + FloatingWindow: config.FloatingWindowConfig{ + Enabled: false, + RespawnOnClose: false, + Position: "top-left", + AlwaysOnTop: false, + }, + Screenshot: config.ScreenshotToolConfig{ + Enabled: true, + MaxWidth: 1024, + MaxHeight: 768, + TargetWidth: 512, + TargetHeight: 384, + Format: "png", + Quality: 90, + StreamingEnabled: false, + CaptureInterval: 10, + BufferSize: 3, + TempDir: "/tmp/cu", + LogCaptures: true, + ShowOverlay: false, + }, + RateLimit: config.RateLimitConfig{ + Enabled: false, + MaxActionsPerMinute: 90, + WindowSeconds: 45, + }, + Tools: config.ComputerUseToolsConfig{ + MouseMove: config.MouseMoveToolConfig{Enabled: false}, + MouseClick: config.MouseClickToolConfig{Enabled: true}, + MouseScroll: config.MouseScrollToolConfig{Enabled: false}, + KeyboardType: config.KeyboardTypeToolConfig{ + Enabled: true, + MaxTextLength: 250, + TypingDelayMs: 75, + }, + GetFocusedApp: config.GetFocusedAppToolConfig{Enabled: true}, + ActivateApp: config.ActivateAppToolConfig{Enabled: false}, + }, + } + + if err := config.SaveComputerUse(path, cfg); err != nil { + t.Fatalf("SaveComputerUse() failed: %v", err) + } + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Fatal("File was not created") + } + + loaded, err := config.LoadComputerUse(path) + if err != nil { + t.Fatalf("LoadComputerUse() failed: %v", err) + } + if loaded.Enabled != cfg.Enabled || + loaded.FloatingWindow.Position != cfg.FloatingWindow.Position || + loaded.Screenshot.MaxWidth != cfg.Screenshot.MaxWidth || + loaded.Screenshot.Format != cfg.Screenshot.Format || + loaded.RateLimit.MaxActionsPerMinute != cfg.RateLimit.MaxActionsPerMinute { + t.Errorf("Round-trip mismatch: got %+v", loaded) + } + if loaded.Tools.KeyboardType.MaxTextLength != cfg.Tools.KeyboardType.MaxTextLength || + loaded.Tools.KeyboardType.TypingDelayMs != cfg.Tools.KeyboardType.TypingDelayMs { + t.Errorf("Tools.KeyboardType mismatch: got %+v", loaded.Tools.KeyboardType) + } +} + +func TestSaveComputerUse_CreatesParentDirectory(t *testing.T) { + tempDir := t.TempDir() + path := filepath.Join(tempDir, "deeply", "nested", "computer_use.yaml") + if err := config.SaveComputerUse(path, config.DefaultComputerUseConfig()); err != nil { + t.Fatalf("SaveComputerUse() failed: %v", err) + } + if _, err := os.Stat(path); err != nil { + t.Fatalf("File not created at nested path: %v", err) + } +} diff --git a/config/config.go b/config/config.go index cf94da84..bab67611 100644 --- a/config/config.go +++ b/config/config.go @@ -42,7 +42,7 @@ type Config struct { Init InitConfig `yaml:"-" mapstructure:"-"` Compact CompactConfig `yaml:"compact" mapstructure:"compact"` Web WebConfig `yaml:"web" mapstructure:"web"` - ComputerUse ComputerUseConfig `yaml:"computer_use" mapstructure:"computer_use"` + ComputerUse ComputerUseConfig `yaml:"-" mapstructure:"-"` Channels ChannelsConfig `yaml:"-" mapstructure:"-"` configDir string } @@ -743,6 +743,7 @@ func DefaultConfig() *Config { //nolint:funlen ConfigDirName + "/keybindings.yaml", ConfigDirName + "/prompts.yaml", ConfigDirName + "/channels.yaml", + ConfigDirName + "/computer_use.yaml", ".git/", "*.env", }, @@ -974,57 +975,6 @@ func DefaultConfig() *Config { //nolint:funlen }, Servers: []SSHServerConfig{}, }, - ComputerUse: ComputerUseConfig{ - Enabled: false, - FloatingWindow: FloatingWindowConfig{ - Enabled: true, - RespawnOnClose: true, - Position: "top-right", - AlwaysOnTop: true, - }, - Screenshot: ScreenshotToolConfig{ - Enabled: true, - MaxWidth: 1920, - MaxHeight: 1080, - TargetWidth: 1024, - TargetHeight: 768, - Format: "jpeg", - Quality: 85, - StreamingEnabled: true, - CaptureInterval: 3, - BufferSize: 5, - TempDir: "", - LogCaptures: false, - ShowOverlay: true, - }, - RateLimit: RateLimitConfig{ - Enabled: true, - MaxActionsPerMinute: 60, - WindowSeconds: 60, - }, - Tools: ComputerUseToolsConfig{ - MouseMove: MouseMoveToolConfig{ - Enabled: true, - }, - MouseClick: MouseClickToolConfig{ - Enabled: true, - }, - MouseScroll: MouseScrollToolConfig{ - Enabled: true, - }, - KeyboardType: KeyboardTypeToolConfig{ - Enabled: true, - MaxTextLength: 1000, - TypingDelayMs: 100, - }, - GetFocusedApp: GetFocusedAppToolConfig{ - Enabled: true, - }, - ActivateApp: ActivateAppToolConfig{ - Enabled: true, - }, - }, - }, } } diff --git a/internal/agent/tools/keyboard_type_test.go b/internal/agent/tools/keyboard_type_test.go index 45bae86f..854c9bfe 100644 --- a/internal/agent/tools/keyboard_type_test.go +++ b/internal/agent/tools/keyboard_type_test.go @@ -70,10 +70,10 @@ func TestKeyboardTypeTool_TypingDelay(t *testing.T) { } func TestKeyboardTypeTool_ConfigDefault(t *testing.T) { - cfg := config.DefaultConfig() + cfg := config.DefaultComputerUseConfig() expectedDelay := 100 - actualDelay := cfg.ComputerUse.Tools.KeyboardType.TypingDelayMs + actualDelay := cfg.Tools.KeyboardType.TypingDelayMs if actualDelay != expectedDelay { t.Errorf("Expected default typing delay %d ms, got %d ms", expectedDelay, actualDelay)