diff --git a/.github/workflows/build_cpp.yml b/.github/workflows/build_cpp.yml index 5ada0103f..183856c79 100644 --- a/.github/workflows/build_cpp.yml +++ b/.github/workflows/build_cpp.yml @@ -2,8 +2,9 @@ # SPDX-License-Identifier: MIT # This workflow builds and tests the C++ library in cpp/ -# Tests include: CMake build, GoogleTest unit tests -# Platform: Cross-platform (Linux and Windows) +# Tests include: CMake build, GoogleTest mock tests (cloud), integration tests (STX) +# Integration tests: LLM chat/tool-calling, MCP connection, WiFi diagnostics, Health monitoring +# Platform: Cross-platform (Linux and Windows cloud), Windows STX (self-hosted AMD hardware) name: C++ Build & Test @@ -60,7 +61,7 @@ jobs: key: fetchcontent-${{ matrix.os }}-${{ hashFiles('cpp/CMakeLists.txt') }} - name: Configure CMake - run: cmake -B cpp/build -S cpp -DCMAKE_BUILD_TYPE=Release + run: cmake -B cpp/build -S cpp -DCMAKE_BUILD_TYPE=Release -DGAIA_BUILD_INTEGRATION_TESTS=OFF - name: Build run: cmake --build cpp/build --config Release --parallel @@ -102,6 +103,7 @@ jobs: run: | cmake -B cpp/build -S cpp -DCMAKE_BUILD_TYPE=Release \ -DGAIA_BUILD_TESTS=OFF -DGAIA_BUILD_EXAMPLES=OFF \ + -DGAIA_BUILD_INTEGRATION_TESTS=OFF \ -DCMAKE_INSTALL_PREFIX="${{ runner.temp }}/gaia_install" cmake --build cpp/build --config Release --parallel cmake --install cpp/build --config Release @@ -148,12 +150,13 @@ jobs: run: | cmake -B cpp/build-shared -S cpp -DCMAKE_BUILD_TYPE=Release \ -DBUILD_SHARED_LIBS=ON \ - -DGAIA_BUILD_TESTS=OFF -DGAIA_BUILD_EXAMPLES=OFF + -DGAIA_BUILD_TESTS=OFF -DGAIA_BUILD_EXAMPLES=OFF \ + -DGAIA_BUILD_INTEGRATION_TESTS=OFF cmake --build cpp/build-shared --config Release --parallel - # LLM integration tests on STX hardware with Lemonade Server + # Integration tests on STX hardware: LLM + MCP + WiFi + Health integration-test: - name: C++ LLM Integration Test (STX) + name: C++ Integration Tests (STX) runs-on: ${{ (contains(github.event.pull_request.labels.*.name, 'stx-test') && 'stx-test') || 'stx' }} if: github.event_name != 'pull_request' || github.event.pull_request.draft == false || contains(github.event.pull_request.labels.*.name, 'ready_for_ci') @@ -261,29 +264,48 @@ jobs: cmake --build cpp/build-integration --config Release --parallel if ($LASTEXITCODE -ne 0) { throw "CMake build failed" } + - name: Verify uvx is available + shell: powershell + run: | + # uvx is provided by uv (installed by setup-venv) + $uvx = Get-Command uvx -ErrorAction SilentlyContinue + if ($uvx) { + Write-Host "[OK] uvx found at: $($uvx.Source)" + } else { + Write-Host "[WARN] uvx not found -- MCP and Health integration tests will fail" + Write-Host " uvx should be available via uv (installed by setup-venv)" + exit 0 + } + + # Remove any broken persistent installation that may interfere with uvx + $ErrorActionPreference = "SilentlyContinue" + uv tool uninstall windows-mcp 2>$null + $ErrorActionPreference = "Stop" + Write-Host "[OK] uvx ready (windows-mcp will run via temporary uvx environments)" + - name: Start Lemonade Server and run integration tests shell: powershell - timeout-minutes: 15 + timeout-minutes: 20 env: - GAIA_CPP_TEST_MODEL: Qwen3-4B-GGUF + GAIA_CPP_TEST_MODEL: Qwen3-4B-Instruct-2507-GGUF GAIA_CPP_BASE_URL: http://localhost:8000/api/v1 run: | try { # Start Lemonade with Qwen3-4B-GGUF - .\scripts\start-lemonade.ps1 -ModelName "Qwen3-4B-GGUF" -Port 8000 -CtxSize 8192 -InitWaitTime 15 + .\scripts\start-lemonade.ps1 -ModelName "Qwen3-4B-Instruct-2507-GGUF" -Port 8000 -CtxSize 16384 -InitWaitTime 15 # Verify health $health = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 10 if ($health.status -ne "ok") { throw "Lemonade health check failed" } - Write-Host "[OK] Lemonade Server ready with Qwen3-4B-GGUF" + Write-Host "[OK] Lemonade Server ready with Qwen3-4B-Instruct-2507-GGUF" - # Run C++ integration tests - Write-Host "=== Running C++ LLM Integration Tests ===" - $env:GAIA_CPP_TEST_MODEL = "Qwen3-4B-GGUF" + # Run all C++ integration tests (LLM + MCP + WiFi + Health) + Write-Host "=== Running C++ Integration Tests (LLM + MCP + WiFi + Health) ===" + $env:GAIA_CPP_TEST_MODEL = "Qwen3-4B-Instruct-2507-GGUF" $env:GAIA_CPP_BASE_URL = "http://localhost:8000/api/v1" ctest --test-dir cpp/build-integration -C Release --output-on-failure if ($LASTEXITCODE -ne 0) { throw "C++ integration tests failed" } - Write-Host "[SUCCESS] All C++ LLM integration tests passed!" + Write-Host "[SUCCESS] All C++ integration tests passed!" } catch { Write-Host "[ERROR] $($_.Exception.Message)" @@ -320,7 +342,7 @@ jobs: echo "Build & Test: ${{ needs.build-and-test.result }}" echo "Install Test: ${{ needs.install-test.result }}" echo "Shared Lib Test: ${{ needs.shared-lib-test.result }}" - echo "LLM Integration: ${{ needs.integration-test.result }}" + echo "Integration Tests: ${{ needs.integration-test.result }}" echo "" if [[ "${{ needs.build-and-test.result }}" == "skipped" ]]; then @@ -336,7 +358,7 @@ jobs: # 'skipped' and 'failure' as non-blocking (warn only) since the # runner may lack tools like cmake in PATH. if [[ "${{ needs.integration-test.result }}" == "failure" ]]; then - echo "::warning::LLM integration test failed (STX runner infrastructure issue)" + echo "::warning::Integration tests failed (STX runner infrastructure issue)" fi if [[ "$FAILED" == "0" ]]; then diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 1d57b7ae9..b4b189a08 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -22,7 +22,7 @@ endif() # Integration tests require a running Lemonade server -- always OFF by default. option(GAIA_BUILD_INTEGRATION_TESTS - "Build LLM integration tests (requires lemonade-server with Qwen3-4B-GGUF loaded)" OFF) + "Build LLM integration tests (requires lemonade-server with Qwen3-4B-GGUF loaded)" ON) # HTTPS support for LLM endpoints -- ON by default (required for remote/cloud Lemonade). option(GAIA_ENABLE_SSL @@ -85,6 +85,7 @@ endif() add_library(gaia_core src/tool_registry.cpp src/console.cpp + src/clean_console.cpp src/json_utils.cpp src/agent.cpp src/mcp_client.cpp @@ -167,8 +168,8 @@ endif() # Examples # --------------------------------------------------------------------------- if(GAIA_BUILD_EXAMPLES AND WIN32) - add_executable(simple_agent examples/simple_agent.cpp) - target_link_libraries(simple_agent PRIVATE gaia::gaia_core) + add_executable(health_agent examples/health_agent.cpp) + target_link_libraries(health_agent PRIVATE gaia::gaia_core) add_executable(wifi_agent examples/wifi_agent.cpp) target_link_libraries(wifi_agent PRIVATE gaia::gaia_core) @@ -180,22 +181,24 @@ endif() if(GAIA_BUILD_TESTS) enable_testing() - add_executable(gaia_tests + add_executable(tests_mock tests/test_types.cpp tests/test_tool_registry.cpp tests/test_json_utils.cpp tests/test_agent.cpp tests/test_mcp_client.cpp tests/test_console.cpp + tests/test_clean_console.cpp + tests/test_tool_integration.cpp ) - target_link_libraries(gaia_tests PRIVATE + target_link_libraries(tests_mock PRIVATE gaia::gaia_core GTest::gtest_main ) include(GoogleTest) - gtest_discover_tests(gaia_tests) + gtest_discover_tests(tests_mock) endif() # --------------------------------------------------------------------------- @@ -205,18 +208,22 @@ endif() if(GAIA_BUILD_INTEGRATION_TESTS) enable_testing() - add_executable(gaia_integration_tests + add_executable(tests_integration + tests/integration/test_main.cpp tests/integration/test_integration_llm.cpp + tests/integration/test_integration_mcp.cpp + tests/integration/test_integration_wifi.cpp + tests/integration/test_integration_health.cpp ) - target_link_libraries(gaia_integration_tests PRIVATE + target_link_libraries(tests_integration PRIVATE gaia::gaia_core - GTest::gtest_main + GTest::gtest ) include(GoogleTest) - gtest_discover_tests(gaia_integration_tests - PROPERTIES TIMEOUT 120 + gtest_discover_tests(tests_integration + PROPERTIES TIMEOUT 300 ) endif() diff --git a/cpp/README.md b/cpp/README.md index 8e51d3edb..a53451a14 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -7,7 +7,7 @@ A C++ port of the [GAIA](https://github.com/amd/gaia) Python base agent framewor Included demos: -- **`simple_agent`** — Windows System Health Agent that connects to the [Windows MCP server](https://github.com/microsoft/windows-mcp), gathers memory/disk/CPU metrics via PowerShell, and pastes a formatted report into Notepad — demonstrating the full computer-use (CUA) flow over the MCP client-server interface. +- **`health_agent`** — Windows System Health Agent that connects to the [Windows MCP server](https://github.com/microsoft/windows-mcp), gathers memory/disk/CPU metrics via PowerShell, and pastes a formatted report into Notepad — demonstrating the full computer-use (CUA) flow over the MCP client-server interface. - **`wifi_agent`** — Wi-Fi Troubleshooter that diagnoses and fixes network connectivity issues using registered PowerShell tools. Demonstrates adaptive reasoning: the agent decides which tools to run based on the query, interprets results, skips irrelevant steps, applies fixes, and verifies fixes worked — all driven by real LLM reasoning with no hard-coded sequences. --- @@ -41,7 +41,7 @@ Default model: `Qwen3-4B-GGUF` (configurable via `AgentConfig::modelId`) ### 3. Windows MCP Server (for the demo) -The `simple_agent` demo launches the Windows MCP server via `uvx`. Install `uv` first: +The `health_agent` demo launches the Windows MCP server via `uvx`. Install `uv` first: ```bash pip install uv @@ -64,7 +64,7 @@ cmake --build build --config Release ``` Binaries are placed in `build\Release\`: -- `simple_agent.exe` — System Health Agent (MCP demo) +- `health_agent.exe` — System Health Agent (MCP demo) - `wifi_agent.exe` — Wi-Fi Troubleshooter (registered-tool demo) - `gaia_tests.exe` — unit test suite @@ -83,7 +83,7 @@ cmake --build build ``` Binaries are placed in `build/`: -- `simple_agent` +- `health_agent` - `wifi_agent` - `gaia_tests` @@ -94,7 +94,7 @@ Binaries are placed in `build/`: Make sure the Lemonade server is running, then launch the agent: ```bat -build\Release\simple_agent.exe +build\Release\health_agent.exe ``` The agent will attempt to connect to the Windows MCP server on startup. Once connected, try one of these prompts: @@ -186,15 +186,17 @@ gaia/ # repo root │ ├── tool_registry.h # Tool registration and execution │ ├── mcp_client.h # MCP JSON-RPC client (stdio transport) │ ├── json_utils.h # JSON extraction with multi-strategy fallback - │ └── console.h # TerminalConsole / SilentConsole output handlers + │ ├── console.h # TerminalConsole / SilentConsole output handlers + │ └── clean_console.h # CleanConsole — polished TUI with colors and word-wrap ├── src/ │ ├── agent.cpp # Agent loop state machine │ ├── tool_registry.cpp │ ├── mcp_client.cpp # Cross-platform subprocess + pipes (Win32 / POSIX) │ ├── json_utils.cpp - │ └── console.cpp + │ ├── console.cpp + │ └── clean_console.cpp ├── examples/ - │ ├── simple_agent.cpp # Windows System Health Agent (MCP/CUA demo) + │ ├── health_agent.cpp # Windows System Health Agent (MCP/CUA demo) │ └── wifi_agent.cpp # Wi-Fi Troubleshooter (registered-tool demo) └── tests/ ├── test_agent.cpp @@ -202,6 +204,8 @@ gaia/ # repo root ├── test_json_utils.cpp ├── test_mcp_client.cpp ├── test_console.cpp + ├── test_clean_console.cpp + ├── test_tool_integration.cpp └── test_types.cpp ``` @@ -267,7 +271,9 @@ The agent is pure C++ — PowerShell is just the subprocess that runs system com ### Custom TUI (`CleanConsole`) -The `wifi_agent` overrides the default `OutputHandler` with a custom `CleanConsole` that parses the LLM's structured reasoning output: +Both example agents use `gaia::CleanConsole` (from ``) for polished terminal output: ANSI colors, word-wrapping, bordered tool output previews, and a bordered final answer section. + +The base `CleanConsole` parses structured reasoning prefixes from the LLM output: - **`FINDING:`** prefix → green label — what the data shows - **`DECISION:`** prefix → yellow label — what the agent will do next and why diff --git a/cpp/examples/health_agent.cpp b/cpp/examples/health_agent.cpp new file mode 100644 index 000000000..1f9a72f0c --- /dev/null +++ b/cpp/examples/health_agent.cpp @@ -0,0 +1,368 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// System Health Agent — connects to a Windows MCP server and performs +// system health checks with a polished terminal UI. +// +// Ported from Python: examples/mcp_windows_system_health_agent.py +// +// Usage: +// ./health_agent +// > Run a full system health analysis. +// +// Requirements: +// - Windows MCP server: uvx windows-mcp +// - LLM server running at http://localhost:8000/api/v1 + +#include +#include +#include +#include +#include + +#include +#include +#include + +// Alias for convenience — matches wifi_agent pattern +namespace color = gaia::color; + +// --------------------------------------------------------------------------- +// Windows System Health Agent +// --------------------------------------------------------------------------- +/// Connects to the Windows MCP server for PowerShell, GUI automation, etc. +class WindowsSystemHealthAgent : public gaia::Agent { +public: + explicit WindowsSystemHealthAgent(const std::string& modelId) + : Agent(makeConfig(modelId)) { + setOutputHandler(std::make_unique()); + init(); // Register tools and compose system prompt + + // Connect to Windows MCP server + std::cout << color::GRAY << " Connecting to Windows MCP server..." + << color::RESET << std::endl; + bool success = connectMcpServer("windows", { + {"command", "uvx"}, + {"args", {"windows-mcp"}} + }); + + if (success) { + std::cout << color::GREEN << " Connected to Windows MCP server" + << color::RESET << std::endl; + } else { + std::cout << color::RED << color::BOLD + << " [ERROR] " << color::RESET << color::RED + << "Failed to connect to Windows MCP server" + << color::RESET << std::endl; + std::cout << color::GRAY + << " Ensure 'uvx' is installed: pip install uv" + << color::RESET << std::endl; + } + } + + ~WindowsSystemHealthAgent() override { + disconnectAllMcp(); + } + +protected: + std::string getSystemPrompt() const override { + return R"(You are an expert Windows system administrator using the Windows MCP server. + +You are an intelligent agent. Given a user's question, decide which tools are relevant, run them one at a time, reason about each result, adapt your approach based on what you find, and continue until the question is answered. + +IMPORTANT: Be concise. Keep FINDING and DECISION to 1-2 sentences each. No filler words. + +CRITICAL: Do NOT provide a final "answer" until you have finished ALL relevant tool calls. If you still have tools to run, you MUST call the next tool - do NOT stop early with an answer. Only provide an "answer" when your investigation is truly complete. + +## REASONING PROTOCOL + +After EVERY tool result, structure your thought using these exact prefixes: + +FINDING: <1-2 sentences: key facts and values from the output> +DECISION: <1 sentence: what to do next and WHY> + +The user sees FINDING and DECISION highlighted in the UI. Use them to make your reasoning visible. + +## AVAILABLE POWERSHELL COMMANDS + +Use mcp_windows_Shell to execute these PowerShell commands: + +Memory: Get-CimInstance Win32_OperatingSystem | Select-Object @{N='TotalGB';E={[math]::Round($_.TotalVisibleMemorySize/1MB,2)}}, @{N='FreeGB';E={[math]::Round($_.FreePhysicalMemory/1MB,2)}} | ConvertTo-Json + +Disk: Get-PSDrive -PSProvider FileSystem | Where-Object {$_.Used -ne $null} | Select-Object Name, @{N='UsedGB';E={[math]::Round($_.Used/1GB,2)}}, @{N='FreeGB';E={[math]::Round($_.Free/1GB,2)}} | ConvertTo-Json + +CPU: Get-CimInstance Win32_Processor | Select-Object Name, LoadPercentage, NumberOfCores | ConvertTo-Json + +GPU: Get-CimInstance Win32_VideoController | Select-Object Name, AdapterRAM, DriverVersion, VideoProcessor | ConvertTo-Json + +Top Processes: Get-Process | Sort-Object CPU -Descending | Select-Object -First 10 Name, @{N='CPU_Sec';E={[math]::Round($_.CPU,1)}}, @{N='MemMB';E={[math]::Round($_.WorkingSet64/1MB,1)}}, Id | ConvertTo-Json + +Network Config: Get-NetIPConfiguration | Select-Object InterfaceAlias, @{N='IPv4';E={($_.IPv4Address).IPAddress}}, @{N='Gateway';E={($_.IPv4DefaultGateway).NextHop}}, @{N='DNS';E={($_.DNSServer).ServerAddresses -join ', '}} | ConvertTo-Json + +Startup Programs: Get-CimInstance Win32_StartupCommand | Select-Object Name, Command, Location | ConvertTo-Json + +Recent System Errors: Get-WinEvent -FilterHashtable @{LogName='System'; Level=2; StartTime=(Get-Date).AddHours(-24)} -MaxEvents 10 -ErrorAction SilentlyContinue | Select-Object TimeCreated, Id, Message | ConvertTo-Json + +Windows Update Status: Get-HotFix | Sort-Object InstalledOn -Descending -ErrorAction SilentlyContinue | Select-Object -First 10 HotFixID, Description, InstalledOn | ConvertTo-Json + +Battery Health: Get-CimInstance Win32_Battery | Select-Object @{N='Status';E={$_.Status}}, @{N='ChargePercent';E={$_.EstimatedChargeRemaining}}, @{N='RunTimeMins';E={$_.EstimatedRunTime}}, @{N='Chemistry';E={switch($_.Chemistry){1{'Other'}2{'Unknown'}3{'Lead Acid'}4{'Nickel Cadmium'}5{'Nickel Metal Hydride'}6{'Lithium-ion'}7{'Zinc air'}8{'Lithium Polymer'}default{'N/A'}}}} | ConvertTo-Json + +Installed Software: Get-ItemProperty HKLM:\Software\Microsoft\Windows\CurrentVersion\Uninstall\* | Where-Object {$_.DisplayName -ne $null} | Sort-Object InstallDate -Descending -ErrorAction SilentlyContinue | Select-Object -First 20 DisplayName, DisplayVersion, Publisher, InstallDate | ConvertTo-Json + +Storage Health (SMART): Get-PhysicalDisk | Select-Object FriendlyName, MediaType, @{N='SizeGB';E={[math]::Round($_.Size/1GB,1)}}, HealthStatus, OperationalStatus | ConvertTo-Json + +## HOW TO APPROACH A QUERY + +Your approach should be entirely driven by the query: +- "Quick health check" -> gather 4 core metrics (memory, disk, CPU, GPU) and give a concise text summary in chat. Do NOT open Notepad or write files. +- "Full diagnostics + report" -> gather ALL metrics (memory, disk, CPU, GPU, processes, network, startup, errors, updates, battery, software, storage), write report to temp file, open in Notepad +- "Check memory" -> just run the memory command, report the result, stop +- "Check disk space" -> just run the disk command, report, stop +- "Check CPU info" -> just run the CPU command, report, stop +- "Check GPU info" -> just run the GPU command, report, stop +- "Top processes" -> just run the top processes command, report, stop +- "Network config" -> just run the network command, report, stop +- "Startup programs" -> just run the startup command, report, stop +- "Recent errors" -> just run the event log command, report, stop +- "Windows updates" -> just run the hotfix command, report, stop +- "Battery health" -> just run the battery command, report, stop +- "Installed software" -> just run the software command, report, stop +- "Storage health" -> just run the SMART command, report, stop +- "What LLM models can I run?" -> gather RAM, disk, CPU, GPU specs, then recommend models using the reference table below + +For targeted queries (single metric), just run the relevant command and give a direct answer. Do NOT open Notepad or generate a report for simple queries. + +## LLM MODEL RECOMMENDATIONS BY HARDWARE + +Use this reference when recommending models. RAM = system RAM, VRAM = GPU dedicated memory. + +- 8GB RAM, no dedicated GPU: Qwen3-0.6B-GGUF, Phi-4-mini-3.8B-GGUF +- 16GB RAM, any GPU: Qwen3-4B-GGUF, Llama-3.2-3B-GGUF, Phi-4-mini-3.8B-GGUF +- 16GB RAM + AMD NPU (Ryzen AI): Qwen3-4B-FLM (NPU-accelerated via Lemonade) +- 32GB RAM, 8GB+ VRAM: Qwen3-8B-GGUF, Llama-3.1-8B-GGUF, Qwen3-Coder-30B-A3B-GGUF (MoE, only 3B active) +- 64GB+ RAM, 16GB+ VRAM: Llama-3.3-70B-GGUF (Q4), Qwen3-32B-GGUF, DeepSeek-R1-Distill-Qwen-32B-GGUF + +Recommend Lemonade Server (lemonade-server) as the inference backend for AMD hardware. GGUF models run on GPU, FLM models run on NPU. + +## QUICK HEALTH CHECK PROTOCOL (for "quick health check" requests) + +Gather 4 core metrics, then give a concise text summary. Do NOT open Notepad. +1. Get memory info with mcp_windows_Shell +2. Get disk info with mcp_windows_Shell +3. Get CPU info with mcp_windows_Shell +4. Get GPU info with mcp_windows_Shell +5. Provide a concise text summary as your final answer. No file, no Notepad. + +Do NOT give a final answer until ALL 4 tool calls are completed. + +## COMPREHENSIVE DIAGNOSTICS + REPORT PROTOCOL + +For a comprehensive diagnostics report, gather ALL of these in order: +1. Memory info +2. Disk info +3. CPU info +4. GPU info +5. Top 10 processes by CPU +6. Network configuration +7. Startup programs +8. Recent system errors (last 24h) +9. Windows Update history +10. Battery health (if laptop) +11. Installed software (top 20) +12. Storage health (SMART) +13. Build a formatted report and save it to a temp file, then open in Notepad. Use a SINGLE mcp_windows_Shell call. Build the report using an array of lines joined with real newlines. Example pattern: + +$lines = @('System Health Report', '', '--- Memory ---', 'Total: X GB, Free: Y GB', '', '--- Disk ---', 'C: X used, Y free', '', '--- CPU ---', 'Name, Cores, Load', '', '--- GPU ---', 'Name, VRAM'); $path = Join-Path $env:TEMP ('SystemHealth_' + (Get-Date -Format 'yyyyMMdd_HHmmss') + '.txt'); $lines -join [Environment]::NewLine | Out-File -FilePath $path -Encoding UTF8; Start-Process notepad $path; $path + +Replace placeholder values with actual data from steps 1-12. IMPORTANT: Use an array of strings joined with [Environment]::NewLine. Do NOT use literal backslash-n characters. + +Do NOT give a final answer until ALL data is gathered and the report is opened in Notepad. + +## FINAL ANSWER + +Only provide an "answer" after ALL tool calls are complete. +IMPORTANT: Use only ASCII characters. Do NOT use em-dashes, en-dashes, or unicode symbols. Use a hyphen (-) or colon (:) instead. +Use ** around key values (RAM amounts, disk sizes, CPU names, percentages, GPU names) to highlight them. +Do NOT use markdown tables. Use bullet points and hyphens only. + +## GOAL TRACKING + +Always set a short `goal` field (3-6 words) describing your current objective.)"; + } + +private: + static gaia::AgentConfig makeConfig(const std::string& modelId) { + gaia::AgentConfig config; + config.maxSteps = 75; + config.modelId = modelId; + config.contextSize = 32768; // 32K needed for "Run ALL diagnostics" (12+ tool calls) + return config; + } +}; + +// --------------------------------------------------------------------------- +// Health-check menu — maps numbered selections to pre-written prompts +// --------------------------------------------------------------------------- +static const std::pair kHealthMenu[] = { + {"Quick health check (console summary)", + "Run a quick health check. Check memory, disk space, CPU, and GPU info. Give a concise text summary. Do NOT open Notepad or write any files."}, + {"Check memory usage", + "Check the system memory usage. Report total RAM, free RAM, and usage percentage."}, + {"Check disk space", + "Check disk space on all drives. Report used and free space for each drive."}, + {"Check CPU info", + "Check the CPU information. Report the processor name, number of cores, and current load percentage."}, + {"Check GPU info", + "Check the GPU information. Report the GPU name, VRAM, driver version, and video processor."}, + {"Top processes by CPU usage", + "Show the top 10 processes by CPU usage. Report process name, CPU time, memory usage, and PID."}, + {"Network configuration", + "Check the network configuration. Report interface names, IPv4 addresses, gateways, and DNS servers."}, + {"Startup programs", + "List programs that run at startup. Report name, command, and location (registry key or startup folder)."}, + {"Recent system errors (last 24h)", + "Check the Windows Event Log for system errors in the last 24 hours. Report time, event ID, and message for the 10 most recent errors."}, + {"Windows Update status", + "Check the Windows Update history. Report the 10 most recent hotfixes with ID, description, and install date."}, + {"Battery health", + "Check the battery health status. Report charge percentage, estimated run time, battery chemistry, and overall status."}, + {"Installed software (top 20)", + "List the 20 most recently installed programs. Report name, version, publisher, and install date."}, + {"Storage health (SMART)", + "Check storage device health using SMART data. Report disk name, media type, size, health status, and operational status."}, + {"What LLM models can my system run?", + "Analyze the system specs (RAM, disk, CPU, GPU) and recommend which LLM models this machine can run locally. Consider models like Qwen3, Llama, and Phi."}, + {"Run ALL diagnostics + generate report", + "Run a comprehensive system diagnostic. Gather ALL system information: memory, disk, CPU, GPU, top processes, network config, startup programs, recent system errors, Windows Update status, battery health, installed software, and storage health. Then write a formatted report to a temp file and open it in Notepad."}, +}; +static constexpr size_t kMenuSize = sizeof(kHealthMenu) / sizeof(kHealthMenu[0]); + +static void printHealthMenu() { + std::cout << color::CYAN + << " ========================================================================================" + << color::RESET << std::endl; + for (size_t i = 0; i < kMenuSize; ++i) { + size_t num = i + 1; + // Right-align numbers for clean columns (e.g. " [1]" vs "[15]") + if (num < 10) + std::cout << color::YELLOW << " [" << num << "] "; + else + std::cout << color::YELLOW << " [" << num << "] "; + std::cout << color::RESET << color::WHITE + << kHealthMenu[i].first + << color::RESET << std::endl; + } + std::cout << color::CYAN + << " ========================================================================================" + << color::RESET << std::endl; + std::cout << color::GRAY + << " Or type your own question. Type 'quit' to exit." + << color::RESET << std::endl; + std::cout << std::endl; +} + +// --------------------------------------------------------------------------- +// main — model selection + interactive loop with health-check menu +// --------------------------------------------------------------------------- +int main() { + try { + // --- Banner --- + std::cout << std::endl; + std::cout << color::CYAN << color::BOLD + << " ========================================================================================" + << color::RESET << std::endl; + std::cout << color::CYAN << color::BOLD + << " System Health Agent | GAIA C++ Agent Framework | Local Inference" + << color::RESET << std::endl; + std::cout << color::CYAN << color::BOLD + << " ========================================================================================" + << color::RESET << std::endl; + + // --- Model selection --- + std::cout << std::endl; + std::cout << color::BOLD << " Select inference backend:" + << color::RESET << std::endl; + std::cout << color::YELLOW << " [1] " << color::RESET + << color::GREEN << "GPU" << color::RESET + << color::GRAY << " - Qwen3-4B-Instruct-2507-GGUF" + << color::RESET << std::endl; + std::cout << color::YELLOW << " [2] " << color::RESET + << color::MAGENTA << "NPU" << color::RESET + << color::GRAY << " - Qwen3-4B-Instruct-2507-FLM" + << color::RESET << std::endl; + std::cout << std::endl; + std::cout << color::BOLD << " > " << color::RESET << std::flush; + + std::string modelChoice; + if (!std::getline(std::cin, modelChoice)) return 1; + + std::string modelId; + if (modelChoice == "2") { + modelId = "Qwen3-4B-Instruct-2507-FLM"; + std::cout << color::MAGENTA << " Using NPU backend: " + << color::BOLD << modelId << color::RESET << std::endl; + } else { + modelId = "Qwen3-4B-Instruct-2507-GGUF"; + std::cout << color::GREEN << " Using GPU backend: " + << color::BOLD << modelId << color::RESET << std::endl; + } + + WindowsSystemHealthAgent agent(modelId); + + std::cout << std::endl; + std::cout << color::GREEN << color::BOLD << " Ready!" + << color::RESET << std::endl; + std::cout << std::endl; + + // --- Interactive loop with health-check menu --- + std::string userInput; + while (true) { + printHealthMenu(); + std::cout << color::BOLD << " > " << color::RESET << std::flush; + if (!std::getline(std::cin, userInput)) break; + + if (userInput.empty()) continue; + if (userInput == "quit" || userInput == "exit" || userInput == "q") break; + + // Map numbered selection to pre-written prompt + std::string query; + bool isNumber = !userInput.empty() && + std::all_of(userInput.begin(), userInput.end(), + [](unsigned char c) { return std::isdigit(c); }); + if (isNumber) { + int choice = 0; + try { choice = std::stoi(userInput); } + catch (const std::out_of_range&) { choice = -1; } + if (choice >= 1 && choice <= static_cast(kMenuSize)) { + size_t idx = static_cast(choice - 1); + query = kHealthMenu[idx].second; + std::cout << color::CYAN << " > " + << kHealthMenu[idx].first + << color::RESET << std::endl; + } else { + std::cout << color::RED << " Invalid selection. Enter 1-" + << kMenuSize << " or type a question." + << color::RESET << std::endl; + continue; + } + } else { + query = userInput; + } + + auto result = agent.processQuery(query); + // Final answer is printed by CleanConsole::printFinalAnswer() + (void)result; + } + + std::cout << std::endl; + std::cout << color::GRAY << " Goodbye!" << color::RESET << std::endl; + + } catch (const std::exception& e) { + std::cerr << color::RED << color::BOLD << "Fatal error: " + << color::RESET << color::RED << e.what() + << color::RESET << std::endl; + return 1; + } + + return 0; +} diff --git a/cpp/examples/simple_agent.cpp b/cpp/examples/simple_agent.cpp deleted file mode 100644 index 165515055..000000000 --- a/cpp/examples/simple_agent.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. -// SPDX-License-Identifier: MIT -// -// Simple demo agent that connects to a Windows MCP server -// and performs system health checks. -// -// Ported from Python: examples/mcp_windows_system_health_agent.py -// -// Usage: -// ./simple_agent -// > Run a full system health analysis. -// -// Requirements: -// - Windows MCP server: uvx windows-mcp -// - LLM server running at http://localhost:8000/api/v1 - -#include -#include - -#include -#include - -/// Windows System Health Agent. -/// Connects to the Windows MCP server for PowerShell, GUI automation, etc. -class WindowsSystemHealthAgent : public gaia::Agent { -public: - WindowsSystemHealthAgent() : Agent(makeConfig()) { - init(); // Register tools and compose system prompt - - // Connect to Windows MCP server - std::cout << "Connecting to Windows MCP server..." << std::endl; - bool success = connectMcpServer("windows", { - {"command", "uvx"}, - {"args", {"windows-mcp"}} - }); - - if (success) { - std::cout << " Connected to Windows MCP server" << std::endl; - } else { - std::cout << " [ERROR] Failed to connect to Windows MCP server" << std::endl; - std::cout << " Ensure 'uvx' is installed: pip install uv" << std::endl; - } - } - - ~WindowsSystemHealthAgent() override { - disconnectAllMcp(); - } - -protected: - std::string getSystemPrompt() const override { - return R"(You are an expert Windows system administrator using the Windows MCP server. - -CRITICAL: Your task is NOT complete until you have pasted the report into Notepad. -DO NOT give a final answer until you have completed ALL of these steps: - -## MANDATORY STEPS (must complete all 6): - -[ ] Step 1: Get memory info with mcp_windows_Shell -[ ] Step 2: Get disk info with mcp_windows_Shell -[ ] Step 3: Get CPU info with mcp_windows_Shell -[ ] Step 4: Copy formatted report to clipboard with mcp_windows_Shell (Set-Clipboard) -[ ] Step 5: Open Notepad with mcp_windows_Shell (Start-Process notepad) -[ ] Step 6: Paste with mcp_windows_Shortcut (ctrl+v) - ---- - -## Step 1-3: Gather Health Metrics - -Use mcp_windows_Shell to execute these PowerShell commands: - -Memory: Get-CimInstance Win32_OperatingSystem | Select-Object @{N='TotalGB';E={[math]::Round($_.TotalVisibleMemorySize/1MB,2)}}, @{N='FreeGB';E={[math]::Round($_.FreePhysicalMemory/1MB,2)}} | ConvertTo-Json - -Disk: Get-PSDrive -PSProvider FileSystem | Where-Object {$_.Used -ne $null} | Select-Object Name, @{N='UsedGB';E={[math]::Round($_.Used/1GB,2)}}, @{N='FreeGB';E={[math]::Round($_.Free/1GB,2)}} | ConvertTo-Json - -CPU: Get-WmiObject Win32_Processor | Select-Object Name, LoadPercentage, NumberOfCores | ConvertTo-Json - -## Step 4: Copy Report to Clipboard - -Create a formatted report and copy to clipboard using Set-Clipboard. - -## Step 5: Open Notepad - -mcp_windows_Shell with command: Start-Process notepad -Then use mcp_windows_Wait with duration: 2 - -## Step 6: Paste the Report - -mcp_windows_Shortcut with shortcut: ctrl+v - -IMPORTANT: Only provide your final answer AFTER you have executed the ctrl+v shortcut.)"; - } - -private: - static gaia::AgentConfig makeConfig() { - gaia::AgentConfig config; - config.maxSteps = 55; - return config; - } -}; - -int main() { - try { - WindowsSystemHealthAgent agent; - - std::cout << "\nWindows System Health Agent ready! Type 'quit' to exit." << std::endl; - std::cout << "Try: 'Run a full system health analysis.'" << std::endl; - std::cout << " or 'How much RAM and disk space do I have?'" << std::endl; - std::cout << " or 'What LLM models can my system run?'\n" << std::endl; - - std::string userInput; - while (true) { - std::cout << "You: " << std::flush; - std::getline(std::cin, userInput); - - if (userInput.empty()) continue; - if (userInput == "quit" || userInput == "exit" || userInput == "q") break; - - auto result = agent.processQuery(userInput); - if (result.contains("result") && !result["result"].get().empty()) { - std::cout << "\nAgent: " << result["result"].get() << "\n" << std::endl; - } - } - - } catch (const std::exception& e) { - std::cerr << "Fatal error: " << e.what() << std::endl; - return 1; - } - - return 0; -} diff --git a/cpp/examples/wifi_agent.cpp b/cpp/examples/wifi_agent.cpp index e665f48c3..4d1e083d1 100644 --- a/cpp/examples/wifi_agent.cpp +++ b/cpp/examples/wifi_agent.cpp @@ -17,7 +17,9 @@ #include #endif +#include #include +#include #include #include #include @@ -28,368 +30,11 @@ #include #include -#include +#include #include -// --------------------------------------------------------------------------- -// ANSI color constants (shared by CleanConsole and TUI helpers) -// --------------------------------------------------------------------------- -namespace color { - constexpr const char* RESET = "\033[0m"; - constexpr const char* BOLD = "\033[1m"; - constexpr const char* DIM = "\033[2m"; - constexpr const char* ITALIC = "\033[3m"; - constexpr const char* UNDERLN = "\033[4m"; - constexpr const char* GRAY = "\033[90m"; - constexpr const char* RED = "\033[91m"; - constexpr const char* GREEN = "\033[92m"; - constexpr const char* YELLOW = "\033[93m"; - constexpr const char* BLUE = "\033[94m"; - constexpr const char* MAGENTA = "\033[95m"; - constexpr const char* CYAN = "\033[96m"; - constexpr const char* WHITE = "\033[97m"; - // Background - constexpr const char* BG_BLUE = "\033[44m"; -} - -// --------------------------------------------------------------------------- -// Clean console — nicely formatted progress with tool output summaries -// --------------------------------------------------------------------------- -class CleanConsole : public gaia::OutputHandler { -public: - void printProcessingStart(const std::string& /*query*/, int /*maxSteps*/, - const std::string& /*modelId*/) override { - std::cout << std::endl; - planShown_ = false; - toolsRun_ = 0; - lastGoal_.clear(); - } - - void printStepHeader(int stepNum, int stepLimit) override { - stepNum_ = stepNum; - stepLimit_ = stepLimit; - } - - void printStateInfo(const std::string& /*message*/) override {} - - void printThought(const std::string& thought) override { - if (thought.empty()) return; - - // Look for structured FINDING:/DECISION: reasoning format - auto findingPos = thought.find("FINDING:"); - if (findingPos == std::string::npos) findingPos = thought.find("Finding:"); - auto decisionPos = thought.find("DECISION:"); - if (decisionPos == std::string::npos) decisionPos = thought.find("Decision:"); - - if (findingPos != std::string::npos || decisionPos != std::string::npos) { - // --- Structured reasoning: parse and color-code --- - if (findingPos != std::string::npos) { - size_t start = findingPos + 8; // skip "FINDING:" - size_t end = (decisionPos != std::string::npos) ? decisionPos : thought.size(); - std::string text = thought.substr(start, end - start); - // Trim whitespace - size_t f = text.find_first_not_of(" \t\n\r"); - size_t l = text.find_last_not_of(" \t\n\r"); - if (f != std::string::npos) text = text.substr(f, l - f + 1); - - std::cout << color::GREEN << color::BOLD << " Finding: " - << color::RESET; - printWrapped(text, 79, 11); - } - if (decisionPos != std::string::npos) { - size_t start = decisionPos + 9; // skip "DECISION:" - std::string text = thought.substr(start); - size_t f = text.find_first_not_of(" \t\n\r"); - size_t l = text.find_last_not_of(" \t\n\r"); - if (f != std::string::npos) text = text.substr(f, l - f + 1); - - std::cout << color::YELLOW << color::BOLD << " Decision: " - << color::RESET; - printWrapped(text, 78, 12); - } - } else { - // --- Fallback: existing Analysis/Thinking display --- - if (toolsRun_ > 0) { - std::cout << color::BLUE << color::BOLD << " Analysis: " - << color::RESET; - } else { - std::cout << color::MAGENTA << " Thinking: " << color::RESET; - } - printWrapped(thought, 78, 12); - } - } - - void printGoal(const std::string& goal) override { - if (goal.empty() || goal == lastGoal_) return; - lastGoal_ = goal; - std::cout << std::endl; - std::cout << color::CYAN << color::ITALIC - << " Goal: " << color::RESET; - printWrapped(goal, 82, 8); - } - - void printPlan(const gaia::json& plan, int /*currentStep*/) override { - if (planShown_ || !plan.is_array()) return; - planShown_ = true; - std::cout << color::BOLD << color::CYAN << " Plan: " << color::RESET; - for (size_t i = 0; i < plan.size(); ++i) { - if (i > 0) std::cout << color::GRAY << " -> " << color::RESET; - if (plan[i].is_object() && plan[i].contains("tool")) { - std::cout << color::CYAN - << plan[i]["tool"].get() - << color::RESET; - } - } - std::cout << std::endl; - } - - void printToolUsage(const std::string& toolName) override { - lastToolName_ = toolName; - std::cout << std::endl; - std::cout << color::YELLOW << color::BOLD - << " [" << stepNum_ << "/" << stepLimit_ << "] " - << toolName << color::RESET << std::endl; - } - - void printToolComplete() override { - ++toolsRun_; - } - - void prettyPrintJson(const gaia::json& data, - const std::string& title) override { - // Show tool arguments (the command being sent) - if (title == "Tool Args" && data.is_object() && !data.empty()) { - std::string argsStr; - bool first = true; - for (auto& [key, val] : data.items()) { - if (!first) argsStr += ", "; - argsStr += key + "="; - if (val.is_string()) argsStr += val.get(); - else argsStr += val.dump(); - first = false; - } - std::cout << color::GRAY << " Args: "; - printWrapped(argsStr, 78, 12); - std::cout << color::RESET; - return; - } - - if (title != "Tool Result" || !data.is_object()) return; - - // Show the command that was executed - if (data.contains("command")) { - std::string cmd = data["command"].get(); - std::cout << color::CYAN << " Cmd: " << color::RESET - << color::GRAY; - printWrapped(cmd, 79, 11); - std::cout << color::RESET; - } - - // Show error if present - if (data.contains("error")) { - std::cout << color::RED << color::BOLD << " Error: " - << color::RESET << color::RED - << data["error"].get() - << color::RESET << std::endl; - return; - } - - // Show tool output preview - if (data.contains("output")) { - std::string output = data["output"].get(); - if (output.empty() || output.find("(no output)") != std::string::npos) { - std::cout << color::GREEN << " Result: " - << color::RESET << color::GRAY << "(no output)" - << color::RESET << std::endl; - return; - } - std::cout << color::GREEN << " Output:" << color::RESET - << std::endl; - printOutputPreview(output); - } - - // Show status for fix tools - if (data.contains("status")) { - auto status = data["status"].get(); - const char* statusColor = (status == "completed") - ? color::GREEN : color::YELLOW; - std::cout << statusColor << " Status: " << status - << color::RESET << std::endl; - } - } - - void printError(const std::string& message) override { - std::cout << color::RED << color::BOLD << " ERROR: " << color::RESET - << color::RED; - printWrapped(message, 81, 9); - std::cout << color::RESET; - } - - void printWarning(const std::string& message) override { - std::cout << color::YELLOW << " WARNING: " << color::RESET - << message << std::endl; - } - - void printInfo(const std::string& /*message*/) override {} - - void startProgress(const std::string& /*message*/) override {} - - void stopProgress() override {} - - void printFinalAnswer(const std::string& answer) override { - if (answer.empty()) return; - - // Extract clean text — the LLM sometimes returns raw JSON instead - // of plain text. Try to extract "answer" or "thought" fields. - std::string cleanAnswer = answer; - if (!answer.empty() && answer.front() == '{') { - try { - auto j = gaia::json::parse(answer); - if (j.is_object()) { - if (j.contains("answer") && j["answer"].is_string()) { - cleanAnswer = j["answer"].get(); - } else if (j.contains("thought") && j["thought"].is_string()) { - cleanAnswer = j["thought"].get(); - } - } - } catch (...) { - // Not valid JSON — use as-is - } - } - - std::cout << std::endl; - std::cout << color::GREEN - << " ========================================================================================" - << color::RESET << std::endl; - std::cout << color::GREEN << color::BOLD - << " Conclusion" << color::RESET << std::endl; - std::cout << color::GREEN - << " ========================================================================================" - << color::RESET << std::endl; - // Print each line of the answer word-wrapped - std::string line; - std::istringstream stream(cleanAnswer); - while (std::getline(stream, line)) { - if (line.empty()) { - std::cout << std::endl; - } else { - std::cout << " "; - printWrapped(line, 88, 2); - } - } - std::cout << color::GREEN - << " ========================================================================================" - << color::RESET << std::endl; - } - - void printCompletion(int stepsTaken, int /*stepsLimit*/) override { - std::cout << color::GRAY << " Completed in " << stepsTaken - << " steps" << color::RESET << std::endl; - } - -private: - // Print text with word-wrapping at the given width, indented by indent spaces - // Render **bold** markers as ANSI bold+white, then restore prevColor. - static void printStyledWord(const std::string& word, const char* prevColor) { - size_t pos = 0; - while (pos < word.size()) { - auto boldStart = word.find("**", pos); - if (boldStart == std::string::npos) { - std::cout << word.substr(pos); - break; - } - // Print text before ** - std::cout << word.substr(pos, boldStart - pos); - auto boldEnd = word.find("**", boldStart + 2); - if (boldEnd == std::string::npos) { - // Unmatched ** — print literally - std::cout << word.substr(boldStart); - break; - } - // Print bold content - std::cout << color::BOLD << color::WHITE - << word.substr(boldStart + 2, boldEnd - boldStart - 2) - << color::RESET << prevColor; - pos = boldEnd + 2; - } - } - - static void printWrapped(const std::string& text, size_t width, size_t indent, - const char* prevColor = color::RESET) { - std::string indentStr(indent, ' '); - std::istringstream words(text); - std::string word; - size_t col = 0; - bool firstWord = true; - while (words >> word) { - // Strip ** for length calculation - std::string plain = word; - size_t p; - while ((p = plain.find("**")) != std::string::npos) - plain.erase(p, 2); - - if (!firstWord && col + 1 + plain.size() > width) { - std::cout << std::endl << indentStr; - col = 0; - } else if (!firstWord) { - std::cout << ' '; - ++col; - } - printStyledWord(word, prevColor); - col += plain.size(); - firstWord = false; - } - std::cout << color::RESET << std::endl; - } - - // Print a compact preview of command output (up to kMaxPreviewLines lines) - void printOutputPreview(const std::string& output) { - constexpr int kMaxPreviewLines = 10; - std::istringstream stream(output); - std::string line; - int lineCount = 0; - int totalLines = 0; - - // Count total non-empty lines - { - std::istringstream counter(output); - std::string tmp; - while (std::getline(counter, tmp)) { - if (!tmp.empty() && tmp.find_first_not_of(" \t\r\n") != std::string::npos) - ++totalLines; - } - } - - std::cout << color::GRAY << " .------------------------------------------------------------------------------------" - << color::RESET << std::endl; - while (std::getline(stream, line) && lineCount < kMaxPreviewLines) { - // Skip empty lines - if (line.empty() || line.find_first_not_of(" \t\r\n") == std::string::npos) - continue; - // Trim trailing \r - if (!line.empty() && line.back() == '\r') line.pop_back(); - // Truncate long lines - if (line.size() > 82) line = line.substr(0, 79) + "..."; - std::cout << color::GRAY << " | " << line << color::RESET - << std::endl; - ++lineCount; - } - if (totalLines > kMaxPreviewLines) { - std::cout << color::GRAY << " | ... (" - << (totalLines - kMaxPreviewLines) - << " more lines)" << color::RESET << std::endl; - } - std::cout << color::GRAY << " '------------------------------------------------------------------------------------" - << color::RESET << std::endl; - } - - int stepNum_ = 0; - int stepLimit_ = 0; - int toolsRun_ = 0; - bool planShown_ = false; - std::string lastToolName_; - std::string lastGoal_; -}; +// Alias for convenience +namespace color = gaia::color; // --------------------------------------------------------------------------- // Shell helper — runs a command and captures stdout+stderr @@ -457,7 +102,7 @@ class WiFiTroubleshooterAgent : public gaia::Agent { public: explicit WiFiTroubleshooterAgent(const std::string& modelId) : Agent(makeConfig(modelId)) { - setOutputHandler(std::make_unique()); + setOutputHandler(std::make_unique()); init(); } @@ -679,12 +324,15 @@ for ($i = 0; $i -lt $nStreams; $i++) { // Write to temp file and execute directly (not via runShell which // would double-wrap in PowerShell). std::string tempPath; -#ifdef _WIN32 +#ifdef _MSC_VER char* tmp = nullptr; size_t len = 0; _dupenv_s(&tmp, &len, "TEMP"); tempPath = (tmp ? std::string(tmp) : "C:\\Temp") + "\\gaia_speedtest.ps1"; free(tmp); +#elif defined(_WIN32) + const char* tmp = std::getenv("TEMP"); + tempPath = (tmp ? std::string(tmp) : "C:\\Temp") + "\\gaia_speedtest.ps1"; #else tempPath = "/tmp/gaia_speedtest.ps1"; #endif @@ -907,12 +555,15 @@ for ($i = 0; $i -lt $nStreams; $i++) { // Write to temp file and execute std::string tempPath; -#ifdef _WIN32 +#ifdef _MSC_VER char* tmp = nullptr; size_t len = 0; _dupenv_s(&tmp, &len, "TEMP"); tempPath = (tmp ? std::string(tmp) : "C:\\Temp") + "\\gaia_radio.ps1"; free(tmp); +#elif defined(_WIN32) + const char* tmp = std::getenv("TEMP"); + tempPath = (tmp ? std::string(tmp) : "C:\\Temp") + "\\gaia_radio.ps1"; #else tempPath = "/tmp/gaia_radio.ps1"; #endif @@ -1074,7 +725,7 @@ int main() { std::cout << color::BOLD << " > " << color::RESET << std::flush; std::string modelChoice; - std::getline(std::cin, modelChoice); + if (!std::getline(std::cin, modelChoice)) return 1; std::string modelId; if (modelChoice == "2") { @@ -1099,19 +750,32 @@ int main() { while (true) { printDiagnosticMenu(); std::cout << color::BOLD << " > " << color::RESET << std::flush; - std::getline(std::cin, userInput); + if (!std::getline(std::cin, userInput)) break; if (userInput.empty()) continue; if (userInput == "quit" || userInput == "exit" || userInput == "q") break; // Map numbered selection to pre-written prompt std::string query; - if (userInput.size() == 1 && userInput[0] >= '1' && userInput[0] <= '0' + static_cast(kMenuSize)) { - size_t idx = static_cast(userInput[0] - '1'); - query = kDiagnosticMenu[idx].second; - std::cout << color::CYAN << " > " - << kDiagnosticMenu[idx].first - << color::RESET << std::endl; + bool isNumber = !userInput.empty() && + std::all_of(userInput.begin(), userInput.end(), + [](unsigned char c) { return std::isdigit(c); }); + if (isNumber) { + int choice = 0; + try { choice = std::stoi(userInput); } + catch (const std::out_of_range&) { choice = -1; } + if (choice >= 1 && choice <= static_cast(kMenuSize)) { + size_t idx = static_cast(choice - 1); + query = kDiagnosticMenu[idx].second; + std::cout << color::CYAN << " > " + << kDiagnosticMenu[idx].first + << color::RESET << std::endl; + } else { + std::cout << color::RED << " Invalid selection. Enter 1-" + << kMenuSize << " or type a question." + << color::RESET << std::endl; + continue; + } } else { query = userInput; } diff --git a/cpp/include/gaia/clean_console.h b/cpp/include/gaia/clean_console.h new file mode 100644 index 000000000..3a81296d3 --- /dev/null +++ b/cpp/include/gaia/clean_console.h @@ -0,0 +1,88 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Clean console output handler for agent examples. +// Provides polished terminal UI with ANSI colors, word-wrapping, tool output +// previews, and bordered final-answer sections. +// +// Usage: +// agent.setOutputHandler(std::make_unique()); +// +// Extend for domain-specific formatting: +// class MyConsole : public gaia::CleanConsole { +// void printThought(const std::string& t) override { ... } +// }; + +#pragma once + +#include + +#include "gaia/console.h" +#include "gaia/export.h" + +namespace gaia { + +// --------------------------------------------------------------------------- +// ANSI color constants — shared by CleanConsole and TUI helpers +// --------------------------------------------------------------------------- +namespace color { + constexpr const char* RESET = "\033[0m"; + constexpr const char* BOLD = "\033[1m"; + constexpr const char* DIM = "\033[2m"; + constexpr const char* ITALIC = "\033[3m"; + constexpr const char* UNDERLN = "\033[4m"; + constexpr const char* GRAY = "\033[90m"; + constexpr const char* RED = "\033[91m"; + constexpr const char* GREEN = "\033[92m"; + constexpr const char* YELLOW = "\033[93m"; + constexpr const char* BLUE = "\033[94m"; + constexpr const char* MAGENTA = "\033[95m"; + constexpr const char* CYAN = "\033[96m"; + constexpr const char* WHITE = "\033[97m"; + // Background + constexpr const char* BG_BLUE = "\033[44m"; +} // namespace color + +// --------------------------------------------------------------------------- +// CleanConsole — nicely formatted progress with tool output summaries +// --------------------------------------------------------------------------- +class GAIA_API CleanConsole : public OutputHandler { +public: + void printProcessingStart(const std::string& query, int maxSteps, + const std::string& modelId) override; + void printStepHeader(int stepNum, int stepLimit) override; + void printStateInfo(const std::string& message) override; + void printThought(const std::string& thought) override; + void printGoal(const std::string& goal) override; + void printPlan(const json& plan, int currentStep) override; + void printToolUsage(const std::string& toolName) override; + void printToolComplete() override; + void prettyPrintJson(const json& data, const std::string& title) override; + void printError(const std::string& message) override; + void printWarning(const std::string& message) override; + void printInfo(const std::string& message) override; + void startProgress(const std::string& message) override; + void stopProgress() override; + void printFinalAnswer(const std::string& answer) override; + void printCompletion(int stepsTaken, int stepsLimit) override; + +protected: + /// Render **bold** markers as ANSI bold+white, then restore prevColor. + static void printStyledWord(const std::string& word, const char* prevColor); + + /// Print text with word-wrapping at the given width, indented by indent spaces. + static void printWrapped(const std::string& text, size_t width, size_t indent, + const char* prevColor = color::RESET); + + /// Print a compact preview of command output (up to 10 lines). + void printOutputPreview(const std::string& output); + + int stepNum_ = 0; + int stepLimit_ = 0; + int toolsRun_ = 0; + bool planShown_ = false; + std::string lastToolName_; + std::string lastGoal_; +}; + +} // namespace gaia diff --git a/cpp/include/gaia/types.h b/cpp/include/gaia/types.h index 2fc62e7d1..721a1cb65 100644 --- a/cpp/include/gaia/types.h +++ b/cpp/include/gaia/types.h @@ -143,8 +143,17 @@ struct ParsedResponse { /// Return the default LLM base URL, honoring the LEMONADE_BASE_URL /// environment variable if set (matching the Python CLI behavior). inline std::string defaultBaseUrl() { +#ifdef _MSC_VER + char* env = nullptr; + size_t len = 0; + _dupenv_s(&env, &len, "LEMONADE_BASE_URL"); + std::string result = env ? std::string(env) : "http://localhost:8000/api/v1"; + free(env); + return result; +#else const char* env = std::getenv("LEMONADE_BASE_URL"); // NOLINT(concurrency-mt-unsafe) return env ? std::string(env) : "http://localhost:8000/api/v1"; +#endif } struct AgentConfig { diff --git a/cpp/src/agent.cpp b/cpp/src/agent.cpp index 53ecff569..6910b1c95 100644 --- a/cpp/src/agent.cpp +++ b/cpp/src/agent.cpp @@ -503,16 +503,17 @@ json Agent::processQuery(const std::string& userInput, int maxSteps) { std::string toolName = parsed.toolName.value(); json toolArgs = parsed.toolArgs.value_or(json::object()); - // Loop detection - if (toolCallHistory.size() >= 4) { + // Loop detection — same tool name AND same args repeated 4+ times + if (toolCallHistory.size() >= 3) { bool allSame = true; for (size_t i = toolCallHistory.size() - 3; i < toolCallHistory.size(); ++i) { - if (toolCallHistory[i].first != toolName) { + if (toolCallHistory[i].first != toolName || + toolCallHistory[i].second != toolArgs) { allSame = false; break; } } - if (allSame && toolCallHistory.back().first == toolName) { + if (allSame) { console_->printWarning("Detected repeated tool call loop. Breaking out."); finalAnswer = "Task stopped due to repeated tool call loop."; break; @@ -537,9 +538,9 @@ json Agent::processQuery(const std::string& userInput, int maxSteps) { toolMsg.role = MessageRole::TOOL; toolMsg.name = toolName; std::string resultStr = toolResult.dump(); - if (resultStr.size() > 20000) { - resultStr = resultStr.substr(0, 10000) + "\n...[truncated]...\n" + - resultStr.substr(resultStr.size() - 5000); + if (resultStr.size() > 4000) { + resultStr = resultStr.substr(0, 2000) + "\n...[truncated]...\n" + + resultStr.substr(resultStr.size() - 1500); } toolMsg.content = resultStr; messages.push_back(toolMsg); diff --git a/cpp/src/clean_console.cpp b/cpp/src/clean_console.cpp new file mode 100644 index 000000000..4ab724993 --- /dev/null +++ b/cpp/src/clean_console.cpp @@ -0,0 +1,335 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include "gaia/clean_console.h" + +#include +#include + +namespace gaia { + +void CleanConsole::printProcessingStart(const std::string& /*query*/, int /*maxSteps*/, + const std::string& /*modelId*/) { + std::cout << std::endl; + planShown_ = false; + toolsRun_ = 0; + lastGoal_.clear(); +} + +void CleanConsole::printStepHeader(int stepNum, int stepLimit) { + stepNum_ = stepNum; + stepLimit_ = stepLimit; +} + +void CleanConsole::printStateInfo(const std::string& /*message*/) {} + +void CleanConsole::printThought(const std::string& thought) { + if (thought.empty()) return; + + // Look for structured FINDING:/DECISION: reasoning format + auto findingPos = thought.find("FINDING:"); + if (findingPos == std::string::npos) findingPos = thought.find("Finding:"); + auto decisionPos = thought.find("DECISION:"); + if (decisionPos == std::string::npos) decisionPos = thought.find("Decision:"); + + if (findingPos != std::string::npos || decisionPos != std::string::npos) { + // Structured reasoning: parse and color-code + if (findingPos != std::string::npos) { + size_t start = findingPos + 8; // skip "FINDING:" + size_t end = (decisionPos != std::string::npos) ? decisionPos : thought.size(); + std::string text = thought.substr(start, end - start); + size_t f = text.find_first_not_of(" \t\n\r"); + size_t l = text.find_last_not_of(" \t\n\r"); + if (f != std::string::npos) text = text.substr(f, l - f + 1); + + std::cout << color::GREEN << color::BOLD << " Finding: " + << color::RESET; + printWrapped(text, 79, 11); + } + if (decisionPos != std::string::npos) { + size_t start = decisionPos + 9; // skip "DECISION:" + std::string text = thought.substr(start); + size_t f = text.find_first_not_of(" \t\n\r"); + size_t l = text.find_last_not_of(" \t\n\r"); + if (f != std::string::npos) text = text.substr(f, l - f + 1); + + std::cout << color::YELLOW << color::BOLD << " Decision: " + << color::RESET; + printWrapped(text, 78, 12); + } + } else { + // Fallback: Analysis/Thinking display + if (toolsRun_ > 0) { + std::cout << color::BLUE << color::BOLD << " Analysis: " + << color::RESET; + } else { + std::cout << color::MAGENTA << " Thinking: " << color::RESET; + } + printWrapped(thought, 78, 12); + } +} + +void CleanConsole::printGoal(const std::string& goal) { + if (goal.empty() || goal == lastGoal_) return; + lastGoal_ = goal; + std::cout << std::endl; + std::cout << color::CYAN << color::ITALIC + << " Goal: " << color::RESET; + printWrapped(goal, 82, 8); +} + +void CleanConsole::printPlan(const json& plan, int /*currentStep*/) { + if (planShown_ || !plan.is_array()) return; + planShown_ = true; + std::cout << color::BOLD << color::CYAN << " Plan: " << color::RESET; + for (size_t i = 0; i < plan.size(); ++i) { + if (i > 0) std::cout << color::GRAY << " -> " << color::RESET; + if (plan[i].is_object() && plan[i].contains("tool")) { + std::cout << color::CYAN + << plan[i]["tool"].get() + << color::RESET; + } + } + std::cout << std::endl; +} + +void CleanConsole::printToolUsage(const std::string& toolName) { + lastToolName_ = toolName; + std::cout << std::endl; + std::cout << color::YELLOW << color::BOLD + << " [" << stepNum_ << "/" << stepLimit_ << "] " + << toolName << color::RESET << std::endl; +} + +void CleanConsole::printToolComplete() { + ++toolsRun_; +} + +void CleanConsole::prettyPrintJson(const json& data, + const std::string& title) { + // Show tool arguments (the command being sent) + if (title == "Tool Args" && data.is_object() && !data.empty()) { + std::string argsStr; + bool first = true; + for (auto& [key, val] : data.items()) { + if (!first) argsStr += ", "; + argsStr += key + "="; + if (val.is_string()) argsStr += val.get(); + else argsStr += val.dump(); + first = false; + } + std::cout << color::GRAY << " Args: "; + printWrapped(argsStr, 78, 12); + std::cout << color::RESET; + return; + } + + if (title != "Tool Result" || !data.is_object()) return; + + // Show the command that was executed + if (data.contains("command")) { + std::string cmd = data["command"].get(); + std::cout << color::CYAN << " Cmd: " << color::RESET + << color::GRAY; + printWrapped(cmd, 79, 11); + std::cout << color::RESET; + } + + // Show error if present + if (data.contains("error")) { + std::cout << color::RED << color::BOLD << " Error: " + << color::RESET << color::RED + << data["error"].get() + << color::RESET << std::endl; + return; + } + + // Show tool output preview + if (data.contains("output")) { + std::string output = data["output"].get(); + if (output.empty() || output.find("(no output)") != std::string::npos) { + std::cout << color::GREEN << " Result: " + << color::RESET << color::GRAY << "(no output)" + << color::RESET << std::endl; + return; + } + std::cout << color::GREEN << " Output:" << color::RESET + << std::endl; + printOutputPreview(output); + } + + // Show status + if (data.contains("status")) { + auto status = data["status"].get(); + const char* statusColor = (status == "completed") + ? color::GREEN : color::YELLOW; + std::cout << statusColor << " Status: " << status + << color::RESET << std::endl; + } +} + +void CleanConsole::printError(const std::string& message) { + std::cout << color::RED << color::BOLD << " ERROR: " << color::RESET + << color::RED; + printWrapped(message, 81, 9); + std::cout << color::RESET; +} + +void CleanConsole::printWarning(const std::string& message) { + std::cout << color::YELLOW << " WARNING: " << color::RESET + << message << std::endl; +} + +void CleanConsole::printInfo(const std::string& /*message*/) {} + +void CleanConsole::startProgress(const std::string& /*message*/) {} + +void CleanConsole::stopProgress() {} + +void CleanConsole::printFinalAnswer(const std::string& answer) { + if (answer.empty()) return; + + // Extract clean text — the LLM sometimes returns raw JSON + std::string cleanAnswer = answer; + if (!answer.empty() && answer.front() == '{') { + try { + auto j = json::parse(answer); + if (j.is_object()) { + if (j.contains("answer") && j["answer"].is_string()) { + cleanAnswer = j["answer"].get(); + } else if (j.contains("thought") && j["thought"].is_string()) { + cleanAnswer = j["thought"].get(); + } + } + } catch (...) { + // Not valid JSON — use as-is + } + } + + std::cout << std::endl; + std::cout << color::GREEN + << " ========================================================================================" + << color::RESET << std::endl; + std::cout << color::GREEN << color::BOLD + << " Conclusion" << color::RESET << std::endl; + std::cout << color::GREEN + << " ========================================================================================" + << color::RESET << std::endl; + // Print each line of the answer word-wrapped + std::string line; + std::istringstream stream(cleanAnswer); + while (std::getline(stream, line)) { + if (line.empty()) { + std::cout << std::endl; + } else { + std::cout << " "; + printWrapped(line, 88, 2); + } + } + std::cout << color::GREEN + << " ========================================================================================" + << color::RESET << std::endl; +} + +void CleanConsole::printCompletion(int stepsTaken, int /*stepsLimit*/) { + std::cout << color::GRAY << " Completed in " << stepsTaken + << " steps" << color::RESET << std::endl; +} + +// --------------------------------------------------------------------------- +// Protected helpers +// --------------------------------------------------------------------------- + +void CleanConsole::printStyledWord(const std::string& word, const char* prevColor) { + size_t pos = 0; + while (pos < word.size()) { + auto boldStart = word.find("**", pos); + if (boldStart == std::string::npos) { + std::cout << word.substr(pos); + break; + } + // Print text before ** + std::cout << word.substr(pos, boldStart - pos); + auto boldEnd = word.find("**", boldStart + 2); + if (boldEnd == std::string::npos) { + // Unmatched ** — print literally + std::cout << word.substr(boldStart); + break; + } + // Print bold content + std::cout << color::BOLD << color::WHITE + << word.substr(boldStart + 2, boldEnd - boldStart - 2) + << color::RESET << prevColor; + pos = boldEnd + 2; + } +} + +void CleanConsole::printWrapped(const std::string& text, size_t width, size_t indent, + const char* prevColor) { + std::string indentStr(indent, ' '); + std::istringstream words(text); + std::string word; + size_t col = 0; + bool firstWord = true; + while (words >> word) { + // Strip ** for length calculation + std::string plain = word; + size_t p; + while ((p = plain.find("**")) != std::string::npos) + plain.erase(p, 2); + + if (!firstWord && col + 1 + plain.size() > width) { + std::cout << std::endl << indentStr; + col = 0; + } else if (!firstWord) { + std::cout << ' '; + ++col; + } + printStyledWord(word, prevColor); + col += plain.size(); + firstWord = false; + } + std::cout << color::RESET << std::endl; +} + +void CleanConsole::printOutputPreview(const std::string& output) { + constexpr int kMaxPreviewLines = 10; + std::istringstream stream(output); + std::string line; + int lineCount = 0; + int totalLines = 0; + + // Count total non-empty lines + { + std::istringstream counter(output); + std::string tmp; + while (std::getline(counter, tmp)) { + if (!tmp.empty() && tmp.find_first_not_of(" \t\r\n") != std::string::npos) + ++totalLines; + } + } + + std::cout << color::GRAY << " .------------------------------------------------------------------------------------" + << color::RESET << std::endl; + while (std::getline(stream, line) && lineCount < kMaxPreviewLines) { + // Skip empty lines + if (line.empty() || line.find_first_not_of(" \t\r\n") == std::string::npos) + continue; + // Trim trailing \r + if (!line.empty() && line.back() == '\r') line.pop_back(); + // Truncate long lines + if (line.size() > 82) line = line.substr(0, 79) + "..."; + std::cout << color::GRAY << " | " << line << color::RESET + << std::endl; + ++lineCount; + } + if (totalLines > kMaxPreviewLines) { + std::cout << color::GRAY << " | ... (" + << (totalLines - kMaxPreviewLines) + << " more lines)" << color::RESET << std::endl; + } + std::cout << color::GRAY << " '------------------------------------------------------------------------------------" + << color::RESET << std::endl; +} + +} // namespace gaia diff --git a/cpp/tests/integration/test_integration_health.cpp b/cpp/tests/integration/test_integration_health.cpp new file mode 100644 index 000000000..72c7fc32e --- /dev/null +++ b/cpp/tests/integration/test_integration_health.cpp @@ -0,0 +1,193 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Integration tests for System Health Agent use-cases. +// Tests the full stack: LLM reasoning + MCP transport + windows-mcp Shell tool. +// Mirrors the real health_agent architecture: connectMcpServer("windows", ...) +// auto-discovers mcp_windows_Shell and the LLM calls it to run PowerShell. +// +// Requires: +// - Windows (PowerShell + windows-mcp) +// - uvx installed (pip install uv) +// - lemonade-server running with the test model loaded +// +// Env vars: +// GAIA_CPP_TEST_MODEL — model ID (default: Qwen3-4B-Instruct-2507-GGUF) +// GAIA_CPP_BASE_URL — LLM endpoint (default: http://localhost:8000/api/v1) + +#include +#include +#include + +#include +#include +#include +#include + +#ifndef _WIN32 +TEST(IntegrationHealth, SkipNonWindows) { + GTEST_SKIP() << "Health integration tests require Windows"; +} +#else + +// --------------------------------------------------------------------------- +// Env var helpers +// --------------------------------------------------------------------------- + +static std::string testModel() { +#ifdef _MSC_VER + char* env = nullptr; + size_t len = 0; + _dupenv_s(&env, &len, "GAIA_CPP_TEST_MODEL"); + std::string result = env ? std::string(env) : "Qwen3-4B-Instruct-2507-GGUF"; + free(env); + return result; +#else + const char* env = std::getenv("GAIA_CPP_TEST_MODEL"); + return env ? std::string(env) : "Qwen3-4B-Instruct-2507-GGUF"; +#endif +} + +static std::string testBaseUrl() { +#ifdef _MSC_VER + char* env = nullptr; + size_t len = 0; + _dupenv_s(&env, &len, "GAIA_CPP_BASE_URL"); + std::string result = env ? std::string(env) : "http://localhost:8000/api/v1"; + free(env); + return result; +#else + const char* env = std::getenv("GAIA_CPP_BASE_URL"); + return env ? std::string(env) : "http://localhost:8000/api/v1"; +#endif +} + +static std::string toLower(std::string s) { + std::transform(s.begin(), s.end(), s.begin(), + [](unsigned char c) { return std::tolower(c); }); + return s; +} + +static gaia::AgentConfig healthTestConfig(int maxSteps = 3) { + gaia::AgentConfig cfg; + cfg.baseUrl = testBaseUrl(); + cfg.modelId = testModel(); + cfg.maxSteps = maxSteps; + cfg.silentMode = true; + return cfg; +} + +// --------------------------------------------------------------------------- +// Health Test Agent — uses MCP (windows-mcp) just like the real health_agent +// connectMcpServer auto-discovers mcp_windows_Shell, mcp_windows_Wait, etc. +// The LLM calls mcp_windows_Shell with PowerShell commands. +// --------------------------------------------------------------------------- + +class HealthTestAgent : public gaia::Agent { +public: + explicit HealthTestAgent(int maxSteps = 3) + : Agent(healthTestConfig(maxSteps)) { + init(); + + // Connect to Windows MCP server — same as health_agent.cpp + bool ok = connectMcpServer("windows", { + {"command", "uvx"}, + {"args", {"windows-mcp"}} + }); + if (!ok) { + throw std::runtime_error("Failed to connect to Windows MCP server (uvx windows-mcp)"); + } + } + + ~HealthTestAgent() override { + disconnectAllMcp(); + } + +protected: + std::string getSystemPrompt() const override { + return R"(You are a system health diagnostic assistant using the Windows MCP server. +Use the mcp_windows_Shell tool to execute PowerShell commands and gather system information. +IMPORTANT: You MUST call mcp_windows_Shell to get real data. Do not guess or make up system information. + +Available PowerShell commands: +- Memory: Get-CimInstance Win32_OperatingSystem | Select-Object @{N='TotalGB';E={[math]::Round($_.TotalVisibleMemorySize/1MB,2)}}, @{N='FreeGB';E={[math]::Round($_.FreePhysicalMemory/1MB,2)}} | ConvertTo-Json +- CPU: Get-CimInstance Win32_Processor | Select-Object Name, LoadPercentage, NumberOfCores | ConvertTo-Json +- Disk: Get-PSDrive -PSProvider FileSystem | Where-Object {$_.Used -ne $null} | Select-Object Name, @{N='UsedGB';E={[math]::Round($_.Used/1GB,2)}}, @{N='FreeGB';E={[math]::Round($_.Free/1GB,2)}} | ConvertTo-Json +- GPU: Get-CimInstance Win32_VideoController | Select-Object Name, AdapterRAM, DriverVersion, VideoProcessor | ConvertTo-Json + +When asked to check something, call mcp_windows_Shell with the appropriate command, then summarize the result concisely.)"; + } +}; + +// --------------------------------------------------------------------------- +// Test fixture — skips all tests if MCP connection fails +// --------------------------------------------------------------------------- + +class IntegrationHealthTest : public ::testing::Test { +protected: + std::unique_ptr agent; + + void SetUp() override { + agent = std::make_unique(); + } + + void TearDown() override { + agent.reset(); + } +}; + +// --------------------------------------------------------------------------- +// Tests — LLM + MCP + real PowerShell +// --------------------------------------------------------------------------- + +TEST_F(IntegrationHealthTest, CheckMemory) { + auto result = agent->processQuery( + "Check the system memory using mcp_windows_Shell. " + "How much total RAM do I have and how much is free?"); + + ASSERT_TRUE(result.contains("result")); + std::string answer = result["result"].get(); + EXPECT_FALSE(answer.empty()) << "Expected non-empty response"; + std::string lower = toLower(answer); + bool hasMemInfo = lower.find("gb") != std::string::npos + || lower.find("memory") != std::string::npos + || lower.find("ram") != std::string::npos; + EXPECT_TRUE(hasMemInfo) << "Expected memory info in answer, got: " << answer; +} + +TEST_F(IntegrationHealthTest, CheckCpu) { + auto result = agent->processQuery( + "Check the CPU information using mcp_windows_Shell. " + "What processor do I have and how many cores?"); + + ASSERT_TRUE(result.contains("result")); + std::string answer = result["result"].get(); + EXPECT_FALSE(answer.empty()) << "Expected non-empty response"; + std::string lower = toLower(answer); + bool hasCpuInfo = lower.find("core") != std::string::npos + || lower.find("processor") != std::string::npos + || lower.find("cpu") != std::string::npos + || lower.find("amd") != std::string::npos + || lower.find("intel") != std::string::npos + || lower.find("ryzen") != std::string::npos; + EXPECT_TRUE(hasCpuInfo) << "Expected CPU info in answer, got: " << answer; +} + +TEST_F(IntegrationHealthTest, CheckDisk) { + auto result = agent->processQuery( + "Check the disk space using mcp_windows_Shell. " + "How much free space is on each drive?"); + + ASSERT_TRUE(result.contains("result")); + std::string answer = result["result"].get(); + EXPECT_FALSE(answer.empty()) << "Expected non-empty response"; + std::string lower = toLower(answer); + bool hasDiskInfo = lower.find("gb") != std::string::npos + || lower.find("drive") != std::string::npos + || lower.find("disk") != std::string::npos + || lower.find("c:") != std::string::npos + || lower.find("free") != std::string::npos; + EXPECT_TRUE(hasDiskInfo) << "Expected disk info in answer, got: " << answer; +} + +#endif // _WIN32 diff --git a/cpp/tests/integration/test_integration_llm.cpp b/cpp/tests/integration/test_integration_llm.cpp index 0ce0be0fc..748df0678 100644 --- a/cpp/tests/integration/test_integration_llm.cpp +++ b/cpp/tests/integration/test_integration_llm.cpp @@ -26,13 +26,31 @@ // --------------------------------------------------------------------------- static std::string testModel() { +#ifdef _MSC_VER + char* env = nullptr; + size_t len = 0; + _dupenv_s(&env, &len, "GAIA_CPP_TEST_MODEL"); + std::string result = env ? std::string(env) : "Qwen3-4B-Instruct-2507-GGUF"; + free(env); + return result; +#else const char* env = std::getenv("GAIA_CPP_TEST_MODEL"); return env ? std::string(env) : "Qwen3-4B-Instruct-2507-GGUF"; +#endif } static std::string testBaseUrl() { +#ifdef _MSC_VER + char* env = nullptr; + size_t len = 0; + _dupenv_s(&env, &len, "GAIA_CPP_BASE_URL"); + std::string result = env ? std::string(env) : "http://localhost:8000/api/v1"; + free(env); + return result; +#else const char* env = std::getenv("GAIA_CPP_BASE_URL"); return env ? std::string(env) : "http://localhost:8000/api/v1"; +#endif } static std::string toLower(std::string s) { diff --git a/cpp/tests/integration/test_integration_mcp.cpp b/cpp/tests/integration/test_integration_mcp.cpp new file mode 100644 index 000000000..d5385ad27 --- /dev/null +++ b/cpp/tests/integration/test_integration_mcp.cpp @@ -0,0 +1,160 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Integration tests for MCP (Model Context Protocol) connectivity. +// Tests real MCP server connection, tool discovery, and tool execution +// via the windows-mcp server (uvx windows-mcp). +// +// Requires: +// - Windows (windows-mcp is a Windows MCP server) +// - uvx installed (pip install uv) +// +// Env vars: +// GAIA_CPP_TEST_MODEL — model ID (default: Qwen3-4B-Instruct-2507-GGUF) +// GAIA_CPP_BASE_URL — LLM endpoint (default: http://localhost:8000/api/v1) + +#include +#include +#include + +#include +#include + +#ifndef _WIN32 +TEST(IntegrationMCP, SkipNonWindows) { + GTEST_SKIP() << "MCP integration tests require Windows (windows-mcp)"; +} +#else + +// --------------------------------------------------------------------------- +// Env var helpers +// --------------------------------------------------------------------------- + +static std::string testModel() { +#ifdef _MSC_VER + char* env = nullptr; + size_t len = 0; + _dupenv_s(&env, &len, "GAIA_CPP_TEST_MODEL"); + std::string result = env ? std::string(env) : "Qwen3-4B-Instruct-2507-GGUF"; + free(env); + return result; +#else + const char* env = std::getenv("GAIA_CPP_TEST_MODEL"); + return env ? std::string(env) : "Qwen3-4B-Instruct-2507-GGUF"; +#endif +} + +static std::string testBaseUrl() { +#ifdef _MSC_VER + char* env = nullptr; + size_t len = 0; + _dupenv_s(&env, &len, "GAIA_CPP_BASE_URL"); + std::string result = env ? std::string(env) : "http://localhost:8000/api/v1"; + free(env); + return result; +#else + const char* env = std::getenv("GAIA_CPP_BASE_URL"); + return env ? std::string(env) : "http://localhost:8000/api/v1"; +#endif +} + +// --------------------------------------------------------------------------- +// Minimal agent for MCP testing — no custom tools, just MCP connection +// --------------------------------------------------------------------------- + +class McpTestAgent : public gaia::Agent { +public: + explicit McpTestAgent() : Agent(makeConfig()) { init(); } + ~McpTestAgent() override { disconnectAllMcp(); } + +protected: + std::string getSystemPrompt() const override { return "Test agent for MCP."; } + +private: + static gaia::AgentConfig makeConfig() { + gaia::AgentConfig cfg; + cfg.baseUrl = testBaseUrl(); + cfg.modelId = testModel(); + cfg.maxSteps = 1; + cfg.silentMode = true; + return cfg; + } +}; + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +TEST(IntegrationMCP, ConnectsToWindowsMcp) { + McpTestAgent agent; + bool connected = agent.connectMcpServer("windows", { + {"command", "uvx"}, + {"args", {"windows-mcp"}} + }); + ASSERT_TRUE(connected) << "Failed to connect to Windows MCP server (uvx windows-mcp)"; +} + +TEST(IntegrationMCP, DiscoversShellTool) { + McpTestAgent agent; + bool connected = agent.connectMcpServer("windows", { + {"command", "uvx"}, + {"args", {"windows-mcp"}} + }); + ASSERT_TRUE(connected); + + // windows-mcp should expose at least Shell + EXPECT_TRUE(agent.tools().hasTool("mcp_windows_Shell")) + << "Expected mcp_windows_Shell tool to be discovered"; +} + +TEST(IntegrationMCP, DiscoversMultipleTools) { + McpTestAgent agent; + bool connected = agent.connectMcpServer("windows", { + {"command", "uvx"}, + {"args", {"windows-mcp"}} + }); + ASSERT_TRUE(connected); + + // windows-mcp typically exposes Shell, Wait, Shortcut, etc. + size_t toolCount = agent.tools().size(); + EXPECT_GE(toolCount, 2) + << "Expected at least 2 MCP tools, got: " << toolCount; +} + +TEST(IntegrationMCP, DisconnectAndReconnect) { + McpTestAgent agent; + bool connected = agent.connectMcpServer("windows", { + {"command", "uvx"}, + {"args", {"windows-mcp"}} + }); + ASSERT_TRUE(connected); + ASSERT_TRUE(agent.tools().hasTool("mcp_windows_Shell")); + + // Disconnect + agent.disconnectMcpServer("windows"); + + // Reconnect + bool reconnected = agent.connectMcpServer("windows", { + {"command", "uvx"}, + {"args", {"windows-mcp"}} + }); + ASSERT_TRUE(reconnected) << "Failed to reconnect to Windows MCP server"; + EXPECT_TRUE(agent.tools().hasTool("mcp_windows_Shell")) + << "Expected mcp_windows_Shell after reconnect"; +} + +TEST(IntegrationMCP, SystemPromptIncludesMcpTools) { + McpTestAgent agent; + bool connected = agent.connectMcpServer("windows", { + {"command", "uvx"}, + {"args", {"windows-mcp"}} + }); + ASSERT_TRUE(connected); + + // After MCP connect, rebuildSystemPrompt should include MCP tool descriptions + std::string prompt = agent.systemPrompt(); + EXPECT_NE(prompt.find("mcp_windows_Shell"), std::string::npos) + << "Expected system prompt to contain mcp_windows_Shell tool description"; +} + +#endif // _WIN32 diff --git a/cpp/tests/integration/test_integration_wifi.cpp b/cpp/tests/integration/test_integration_wifi.cpp new file mode 100644 index 000000000..bbaed9bcb --- /dev/null +++ b/cpp/tests/integration/test_integration_wifi.cpp @@ -0,0 +1,278 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Integration tests for Wi-Fi Troubleshooter Agent use-cases. +// Tests real PowerShell diagnostic tool execution + LLM reasoning. +// Only read-only diagnostic tools are registered (no fix tools). +// +// Requires: +// - Windows (PowerShell commands) +// - lemonade-server running with the test model loaded +// +// Env vars: +// GAIA_CPP_TEST_MODEL — model ID (default: Qwen3-4B-Instruct-2507-GGUF) +// GAIA_CPP_BASE_URL — LLM endpoint (default: http://localhost:8000/api/v1) + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifndef _WIN32 +// Skip entire file on non-Windows +TEST(IntegrationWiFi, SkipNonWindows) { + GTEST_SKIP() << "WiFi integration tests require Windows"; +} +#else + +// --------------------------------------------------------------------------- +// Helpers (same as wifi_agent.cpp) +// --------------------------------------------------------------------------- + +static std::string runShell(const std::string& command) { + std::string fullCmd = "powershell -NoProfile -NonInteractive -Command \"& { " + + command + " }\" 2>&1"; + + std::string result; + std::array buffer; + + struct PipeCloser { + void operator()(FILE* f) const { if (f) _pclose(f); } + }; + std::unique_ptr pipe(_popen(fullCmd.c_str(), "r")); + + if (!pipe) { + return "{\"error\": \"Failed to execute command\"}"; + } + + while (fgets(buffer.data(), static_cast(buffer.size()), pipe.get()) != nullptr) { + result += buffer.data(); + } + + return result.empty() ? "{\"status\": \"completed\", \"output\": \"(no output)\"}" : result; +} + +static bool isSafeShellArg(const std::string& arg) { + for (char c : arg) { + if (c == ';' || c == '|' || c == '&' || c == '`' || c == '$' + || c == '(' || c == ')' || c == '{' || c == '}' || c == '<' + || c == '>' || c == '"' || c == '\n' || c == '\r') { + return false; + } + } + return !arg.empty(); +} + +// --------------------------------------------------------------------------- +// Env var helpers +// --------------------------------------------------------------------------- + +static std::string testModel() { +#ifdef _MSC_VER + char* env = nullptr; + size_t len = 0; + _dupenv_s(&env, &len, "GAIA_CPP_TEST_MODEL"); + std::string result = env ? std::string(env) : "Qwen3-4B-Instruct-2507-GGUF"; + free(env); + return result; +#else + const char* env = std::getenv("GAIA_CPP_TEST_MODEL"); + return env ? std::string(env) : "Qwen3-4B-Instruct-2507-GGUF"; +#endif +} + +static std::string testBaseUrl() { +#ifdef _MSC_VER + char* env = nullptr; + size_t len = 0; + _dupenv_s(&env, &len, "GAIA_CPP_BASE_URL"); + std::string result = env ? std::string(env) : "http://localhost:8000/api/v1"; + free(env); + return result; +#else + const char* env = std::getenv("GAIA_CPP_BASE_URL"); + return env ? std::string(env) : "http://localhost:8000/api/v1"; +#endif +} + +static std::string toLower(std::string s) { + std::transform(s.begin(), s.end(), s.begin(), + [](unsigned char c) { return std::tolower(c); }); + return s; +} + +static gaia::AgentConfig wifiTestConfig(int maxSteps = 3) { + gaia::AgentConfig cfg; + cfg.baseUrl = testBaseUrl(); + cfg.modelId = testModel(); + cfg.maxSteps = maxSteps; + cfg.silentMode = true; + return cfg; +} + +// --------------------------------------------------------------------------- +// WiFi Test Agent — registers same read-only diagnostic tools as wifi_agent +// --------------------------------------------------------------------------- + +class WiFiTestAgent : public gaia::Agent { +public: + int toolCallCount = 0; + std::string lastToolCalled; + + explicit WiFiTestAgent(int maxSteps = 3) + : Agent(wifiTestConfig(maxSteps)) { init(); } + +protected: + std::string getSystemPrompt() const override { + return R"(You are a network diagnostic assistant. Use the provided tools to answer questions about the network. +IMPORTANT: You MUST call the appropriate tool to get real data. Do not guess or make up network information. +When asked to check something specific, call the relevant tool, then summarize the result concisely.)"; + } + + void registerTools() override { + toolRegistry().registerTool( + "check_adapter", + "Show Wi-Fi adapter status including SSID, signal strength, radio type, and connection state.", + [this](const gaia::json& /*args*/) -> gaia::json { + ++toolCallCount; + lastToolCalled = "check_adapter"; + std::string cmd = "netsh wlan show interfaces"; + std::string output = runShell(cmd); + return {{"tool", "check_adapter"}, {"command", cmd}, {"output", output}}; + }, + {} + ); + + toolRegistry().registerTool( + "check_ip_config", + "Show full IP configuration for all network adapters including IP address, subnet mask, default gateway, DNS servers.", + [this](const gaia::json& /*args*/) -> gaia::json { + ++toolCallCount; + lastToolCalled = "check_ip_config"; + std::string cmd = "ipconfig /all"; + std::string output = runShell(cmd); + return {{"tool", "check_ip_config"}, {"command", cmd}, {"output", output}}; + }, + {} + ); + + toolRegistry().registerTool( + "test_dns_resolution", + "Test DNS resolution by resolving a hostname to an IP address. Returns JSON with resolved addresses.", + [this](const gaia::json& args) -> gaia::json { + ++toolCallCount; + lastToolCalled = "test_dns_resolution"; + std::string hostname = args.value("hostname", "google.com"); + if (!isSafeShellArg(hostname)) { + return {{"error", "Invalid hostname"}}; + } + std::string cmd = "Resolve-DnsName -Name " + hostname + + " -Type A -ErrorAction Stop | Select-Object Name, IPAddress, QueryType" + + " | ConvertTo-Json"; + std::string output = runShell(cmd); + return {{"tool", "test_dns_resolution"}, {"command", cmd}, {"hostname", hostname}, {"output", output}}; + }, + {{"hostname", gaia::ToolParamType::STRING, false, "Hostname to resolve (default: google.com)"}} + ); + + toolRegistry().registerTool( + "test_internet", + "Test internet connectivity by connecting to 8.8.8.8 on port 443. Returns JSON with connection status and latency.", + [this](const gaia::json& /*args*/) -> gaia::json { + ++toolCallCount; + lastToolCalled = "test_internet"; + std::string cmd = + "Test-NetConnection -ComputerName 8.8.8.8 -Port 443" + " | Select-Object ComputerName, RemotePort, TcpTestSucceeded, PingSucceeded," + " PingReplyDetails" + " | ConvertTo-Json"; + std::string output = runShell(cmd); + return {{"tool", "test_internet"}, {"command", cmd}, {"output", output}}; + }, + {} + ); + + toolRegistry().registerTool( + "ping_host", + "Ping a specific host and return connection status, latency, and resolved address as JSON.", + [this](const gaia::json& args) -> gaia::json { + ++toolCallCount; + lastToolCalled = "ping_host"; + std::string host = args.value("host", ""); + if (host.empty()) { + return {{"error", "host parameter is required"}}; + } + if (!isSafeShellArg(host)) { + return {{"error", "Invalid host"}}; + } + std::string cmd = + "Test-NetConnection -ComputerName " + host + + " | Select-Object ComputerName, RemoteAddress, PingSucceeded, PingReplyDetails" + + " | ConvertTo-Json"; + std::string output = runShell(cmd); + return {{"tool", "ping_host"}, {"command", cmd}, {"host", host}, {"output", output}}; + }, + {{"host", gaia::ToolParamType::STRING, true, "Hostname or IP to ping"}} + ); + } +}; + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +TEST(IntegrationWiFi, CheckAdapter) { + WiFiTestAgent agent; + auto result = agent.processQuery("Check the Wi-Fi adapter status. What SSID am I connected to and what is the signal strength?"); + + ASSERT_TRUE(result.contains("result")); + std::string answer = result["result"].get(); + EXPECT_FALSE(answer.empty()) << "Expected non-empty response"; + EXPECT_GT(agent.toolCallCount, 0) << "Expected at least one tool call"; +} + +TEST(IntegrationWiFi, IpConfig) { + WiFiTestAgent agent; + auto result = agent.processQuery("Show my IP configuration. What is my IP address and default gateway?"); + + ASSERT_TRUE(result.contains("result")); + std::string answer = result["result"].get(); + EXPECT_FALSE(answer.empty()) << "Expected non-empty response"; + EXPECT_GT(agent.toolCallCount, 0) << "Expected at least one tool call"; +} + +TEST(IntegrationWiFi, DnsResolution) { + WiFiTestAgent agent; + auto result = agent.processQuery("Test DNS resolution for google.com. Does it resolve successfully?"); + + ASSERT_TRUE(result.contains("result")); + std::string answer = result["result"].get(); + EXPECT_FALSE(answer.empty()) << "Expected non-empty response"; + EXPECT_GT(agent.toolCallCount, 0) << "Expected at least one tool call"; + // DNS should resolve to an IP — look for dotted-quad pattern or "google" + std::string lower = toLower(answer); + bool hasDnsInfo = lower.find("google") != std::string::npos + || lower.find("resolve") != std::string::npos + || lower.find("ip") != std::string::npos + || lower.find(".") != std::string::npos; + EXPECT_TRUE(hasDnsInfo) << "Expected DNS resolution info in answer, got: " << answer; +} + +TEST(IntegrationWiFi, InternetConnectivity) { + WiFiTestAgent agent; + auto result = agent.processQuery("Test internet connectivity. Can I reach external servers?"); + + ASSERT_TRUE(result.contains("result")); + std::string answer = result["result"].get(); + EXPECT_FALSE(answer.empty()) << "Expected non-empty response"; + EXPECT_GT(agent.toolCallCount, 0) << "Expected at least one tool call"; +} + +#endif // _WIN32 diff --git a/cpp/tests/integration/test_main.cpp b/cpp/tests/integration/test_main.cpp new file mode 100644 index 000000000..1a545e5bd --- /dev/null +++ b/cpp/tests/integration/test_main.cpp @@ -0,0 +1,237 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Custom main() for GAIA C++ integration tests. +// Provides an interactive menu and CLI flags to select test sections. +// +// Interactive menu (no args): +// tests_integration.exe +// +// CLI flags (for CI / AI assistants): +// tests_integration.exe --llm +// tests_integration.exe --mcp +// tests_integration.exe --wifi +// tests_integration.exe --health +// tests_integration.exe --all +// tests_integration.exe --model Qwen3-4B-Instruct-2507-GGUF +// tests_integration.exe --url http://localhost:8000/api/v1 +// +// GTest passthrough (suppresses menu): +// tests_integration.exe --gtest_filter=IntegrationMCP* +// tests_integration.exe --gtest_list_tests + +#include +#include + +#include +#include +#include +#include +#include + +// Reuse gaia::color from clean_console.h — same colors as wifi/health agents +namespace color = gaia::color; + +// --------------------------------------------------------------------------- +// GTest filter patterns for each section +// --------------------------------------------------------------------------- +static const char* kFilterLLM = "LLMIntegrationTest.*"; +static const char* kFilterMCP = "IntegrationMCP.*"; +static const char* kFilterWiFi = "IntegrationWiFi.*"; +static const char* kFilterHealth = "IntegrationHealth*.*"; +static const char* kFilterAll = "*"; + +// --------------------------------------------------------------------------- +// Menu items — same pattern as wifi_agent kDiagnosticMenu / health_agent kHealthMenu +// Each entry: { label, description, gtest filter } +// --------------------------------------------------------------------------- +struct MenuItem { + const char* label; + const char* description; + const char* filter; +}; + +static const MenuItem kTestMenu[] = { + {"LLM tests", "basic chat, tool calling, multi-step, system prompt (5 tests)", kFilterLLM}, + {"MCP tests", "connection, tool discovery, reconnect, prompt rebuild (5 tests)", kFilterMCP}, + {"WiFi tests", "real PowerShell diagnostics + LLM reasoning (4 tests)", kFilterWiFi}, + {"Health tests", "LLM + MCP + real PowerShell system health (3 tests)", kFilterHealth}, + {"All tests", "run everything (17 tests)", kFilterAll}, +}; +static constexpr size_t kMenuSize = sizeof(kTestMenu) / sizeof(kTestMenu[0]); + +// --------------------------------------------------------------------------- +// Windows-safe setenv (works with both MSVC and MinGW) +// --------------------------------------------------------------------------- +static void setEnvVar(const char* name, const std::string& value) { +#ifdef _WIN32 + _putenv_s(name, value.c_str()); +#else + setenv(name, value.c_str(), 1); +#endif +} + +// --------------------------------------------------------------------------- +// Check if any --gtest_* flags are present (suppress menu) +// --------------------------------------------------------------------------- +static bool hasGtestFlags(int argc, char** argv) { + for (int i = 1; i < argc; ++i) { + std::string arg(argv[i]); + if (arg.find("--gtest_") == 0) return true; + } + return false; +} + +// --------------------------------------------------------------------------- +// CLI parsing +// --------------------------------------------------------------------------- +struct CliOptions { + bool llm = false; + bool mcp = false; + bool wifi = false; + bool health = false; + bool all = false; + std::string model; + std::string url; + bool hasSection = false; +}; + +static CliOptions parseCli(int argc, char** argv) { + CliOptions opts; + for (int i = 1; i < argc; ++i) { + std::string arg(argv[i]); + if (arg == "--llm") { opts.llm = true; opts.hasSection = true; } + else if (arg == "--mcp") { opts.mcp = true; opts.hasSection = true; } + else if (arg == "--wifi") { opts.wifi = true; opts.hasSection = true; } + else if (arg == "--health") { opts.health = true; opts.hasSection = true; } + else if (arg == "--all") { opts.all = true; opts.hasSection = true; } + else if (arg == "--model" && i + 1 < argc) { opts.model = argv[++i]; } + else if (arg == "--url" && i + 1 < argc) { opts.url = argv[++i]; } + } + return opts; +} + +// --------------------------------------------------------------------------- +// Build filter from multiple sections: "Suite1.*:Suite2.*" +// --------------------------------------------------------------------------- +static std::string buildFilter(const CliOptions& opts) { + if (opts.all) return kFilterAll; + + std::vector filters; + if (opts.llm) filters.push_back(kFilterLLM); + if (opts.mcp) filters.push_back(kFilterMCP); + if (opts.wifi) filters.push_back(kFilterWiFi); + if (opts.health) filters.push_back(kFilterHealth); + + if (filters.empty()) return kFilterAll; + + std::string combined; + for (size_t i = 0; i < filters.size(); ++i) { + if (i > 0) combined += ":"; + combined += filters[i]; + } + return combined; +} + +// --------------------------------------------------------------------------- +// Interactive menu — same visual style as wifi_agent / health_agent +// --------------------------------------------------------------------------- +static std::string showMenu() { + // Banner + std::cout << std::endl; + std::cout << color::CYAN << color::BOLD + << " ========================================================================================" + << color::RESET << std::endl; + std::cout << color::CYAN << color::BOLD + << " Integration Tests | GAIA C++ Agent Framework | LLM + MCP + WiFi + Health" + << color::RESET << std::endl; + std::cout << color::CYAN << color::BOLD + << " ========================================================================================" + << color::RESET << std::endl; + std::cout << std::endl; + + // Menu items — same layout as printDiagnosticMenu() / printHealthMenu() + std::cout << color::CYAN + << " ========================================================================================" + << color::RESET << std::endl; + for (size_t i = 0; i < kMenuSize; ++i) { + size_t num = i + 1; + std::cout << color::YELLOW << " [" << num << "] " + << color::RESET << color::WHITE + << kTestMenu[i].label + << color::RESET << color::GRAY + << " - " << kTestMenu[i].description + << color::RESET << std::endl; + } + std::cout << color::CYAN + << " ========================================================================================" + << color::RESET << std::endl; + std::cout << color::GRAY + << " CLI: --llm --mcp --wifi --health --all | --model --url " + << color::RESET << std::endl; + std::cout << std::endl; + std::cout << color::BOLD << " > " << color::RESET << std::flush; + + std::string input; + if (!std::getline(std::cin, input)) return kFilterAll; + + // Map selection to filter + bool isNumber = !input.empty() && + std::all_of(input.begin(), input.end(), + [](unsigned char c) { return std::isdigit(c); }); + if (isNumber) { + int choice = 0; + try { choice = std::stoi(input); } + catch (...) { choice = -1; } + if (choice >= 1 && choice <= static_cast(kMenuSize)) { + size_t idx = static_cast(choice - 1); + std::cout << color::CYAN << " > " + << kTestMenu[idx].label + << color::RESET << std::endl; + return kTestMenu[idx].filter; + } + std::cout << color::RED << " Invalid selection. Running all tests." + << color::RESET << std::endl; + } + + return kFilterAll; +} + +// --------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------- +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + + // Parse our custom flags (after GTest consumes --gtest_*) + CliOptions opts = parseCli(argc, argv); + + // Set env vars from CLI if provided + if (!opts.model.empty()) { + setEnvVar("GAIA_CPP_TEST_MODEL", opts.model); + } + if (!opts.url.empty()) { + setEnvVar("GAIA_CPP_BASE_URL", opts.url); + } + + // Determine filter + std::string filter; + + if (hasGtestFlags(argc, argv)) { + // User passed --gtest_filter or --gtest_list_tests — don't override + filter = ""; + } else if (opts.hasSection) { + // CLI flags: --llm, --mcp, etc. + filter = buildFilter(opts); + } else { + // Interactive menu + filter = showMenu(); + } + + // Apply filter (only if we chose one — skip if user passed --gtest_filter) + if (!filter.empty()) { + ::testing::GTEST_FLAG(filter) = filter; + } + + return RUN_ALL_TESTS(); +} diff --git a/cpp/tests/test_clean_console.cpp b/cpp/tests/test_clean_console.cpp new file mode 100644 index 000000000..d56acc00c --- /dev/null +++ b/cpp/tests/test_clean_console.cpp @@ -0,0 +1,731 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include +#include + +#include + +using namespace gaia; + +// --------------------------------------------------------------------------- +// Helper RAII guard to redirect std::cout into an ostringstream and restore +// the original streambuf on scope exit (even if a test assertion fails). +// --------------------------------------------------------------------------- +class CoutCapture { +public: + CoutCapture() : captured_(), oldBuf_(std::cout.rdbuf(captured_.rdbuf())) {} + ~CoutCapture() { std::cout.rdbuf(oldBuf_); } + + std::string str() const { return captured_.str(); } + +private: + std::ostringstream captured_; + std::streambuf* oldBuf_; +}; + +// ---- 1. printProcessingStart resets internal state ---- + +TEST(CleanConsoleTest, PrintProcessingStartResetsState) { + CleanConsole console; + + // Advance internal state: set a goal, mark plan as shown, run a tool + { + CoutCapture cap; + console.printGoal("initial goal"); + console.printPlan(json::array({{{"tool", "t1"}}}), 0); + console.printToolComplete(); + } + + // Reset via printProcessingStart + { + CoutCapture cap; + console.printProcessingStart("query", 10, "model"); + } + + // After reset, the same goal should print again (lastGoal_ cleared) + { + CoutCapture cap; + console.printGoal("initial goal"); + std::string out = cap.str(); + EXPECT_TRUE(out.find("Goal:") != std::string::npos) + << "Goal should appear again after reset; got: " << out; + } + + // After reset, plan should print again (planShown_ cleared) + { + CoutCapture cap; + console.printPlan(json::array({{{"tool", "t1"}}}), 0); + std::string out = cap.str(); + EXPECT_TRUE(out.find("Plan:") != std::string::npos) + << "Plan should appear again after reset; got: " << out; + } + + // After reset, toolsRun_ should be 0 so printThought uses "Thinking:" + { + CoutCapture cap; + console.printThought("some thought"); + std::string out = cap.str(); + EXPECT_TRUE(out.find("Thinking:") != std::string::npos) + << "Should use Thinking label after reset; got: " << out; + } +} + +// ---- 2. printThought with FINDING and DECISION ---- + +TEST(CleanConsoleTest, PrintThoughtFindingAndDecision) { + CleanConsole console; + CoutCapture cap; + + console.printThought("FINDING: The network is down DECISION: Restart the router"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Finding:") != std::string::npos) + << "Expected Finding: label; got: " << out; + EXPECT_TRUE(out.find("Decision:") != std::string::npos) + << "Expected Decision: label; got: " << out; + EXPECT_TRUE(out.find("network is down") != std::string::npos) + << "Expected finding content; got: " << out; + EXPECT_TRUE(out.find("Restart the router") != std::string::npos) + << "Expected decision content; got: " << out; +} + +// ---- 3. printThought with FINDING only ---- + +TEST(CleanConsoleTest, PrintThoughtFindingOnly) { + CleanConsole console; + CoutCapture cap; + + console.printThought("FINDING: The disk is full"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Finding:") != std::string::npos) + << "Expected Finding: label; got: " << out; + EXPECT_TRUE(out.find("disk is full") != std::string::npos) + << "Expected finding content; got: " << out; + // No Decision label should appear + EXPECT_TRUE(out.find("Decision:") == std::string::npos) + << "Decision: should NOT appear; got: " << out; +} + +// ---- 4. printThought with DECISION only ---- + +TEST(CleanConsoleTest, PrintThoughtDecisionOnly) { + CleanConsole console; + CoutCapture cap; + + console.printThought("DECISION: Allocate more memory"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Decision:") != std::string::npos) + << "Expected Decision: label; got: " << out; + EXPECT_TRUE(out.find("Allocate more memory") != std::string::npos) + << "Expected decision content; got: " << out; + // No Finding label should appear + EXPECT_TRUE(out.find("Finding:") == std::string::npos) + << "Finding: should NOT appear; got: " << out; +} + +// ---- 5. printThought fallback (no markers) ---- + +TEST(CleanConsoleTest, PrintThoughtFallbackNoToolsRun) { + CleanConsole console; + CoutCapture cap; + + console.printThought("I need to check something"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Thinking:") != std::string::npos) + << "Expected Thinking: label when toolsRun_==0; got: " << out; + EXPECT_TRUE(out.find("I need to check something") != std::string::npos) + << "Expected thought content; got: " << out; +} + +// ---- 6. printThought fallback after tool ---- + +TEST(CleanConsoleTest, PrintThoughtFallbackAfterTool) { + CleanConsole console; + + // Simulate a tool having completed + { + CoutCapture cap; + console.printToolComplete(); + } + + CoutCapture cap; + console.printThought("Interpreting results"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Analysis:") != std::string::npos) + << "Expected Analysis: label when toolsRun_>0; got: " << out; + EXPECT_TRUE(out.find("Interpreting results") != std::string::npos) + << "Expected thought content; got: " << out; +} + +// ---- 7. printThought empty ---- + +TEST(CleanConsoleTest, PrintThoughtEmpty) { + CleanConsole console; + CoutCapture cap; + + console.printThought(""); + + EXPECT_TRUE(cap.str().empty()) + << "Empty thought should produce no output; got: " << cap.str(); +} + +// ---- 8. printGoal ---- + +TEST(CleanConsoleTest, PrintGoal) { + CleanConsole console; + CoutCapture cap; + + console.printGoal("Diagnose the network issue"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Goal:") != std::string::npos) + << "Expected Goal: label; got: " << out; + EXPECT_TRUE(out.find("Diagnose the network issue") != std::string::npos) + << "Expected goal text; got: " << out; +} + +// ---- 9. printGoal dedup ---- + +TEST(CleanConsoleTest, PrintGoalDedup) { + CleanConsole console; + + // First call: should produce output + { + CoutCapture cap; + console.printGoal("Repeated goal"); + std::string out = cap.str(); + EXPECT_TRUE(out.find("Goal:") != std::string::npos) + << "First call should show goal; got: " << out; + } + + // Second call with same text: should produce no output + { + CoutCapture cap; + console.printGoal("Repeated goal"); + EXPECT_TRUE(cap.str().empty()) + << "Duplicate goal should produce no output; got: " << cap.str(); + } +} + +// ---- 10. printToolUsage ---- + +TEST(CleanConsoleTest, PrintToolUsage) { + CleanConsole console; + + // Set step/limit so the output includes them + console.printStepHeader(2, 5); + + CoutCapture cap; + console.printToolUsage("run_command"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("2") != std::string::npos) + << "Expected step number 2; got: " << out; + EXPECT_TRUE(out.find("5") != std::string::npos) + << "Expected step limit 5; got: " << out; + EXPECT_TRUE(out.find("run_command") != std::string::npos) + << "Expected tool name; got: " << out; +} + +// ---- 11. printToolComplete increments toolsRun_ ---- + +TEST(CleanConsoleTest, PrintToolCompleteIncrementsToolsRun) { + CleanConsole console; + + // Before any tool completion, thought label should be "Thinking:" + { + CoutCapture cap; + console.printThought("before"); + EXPECT_TRUE(cap.str().find("Thinking:") != std::string::npos) + << "Expected Thinking: before tool; got: " << cap.str(); + } + + // Complete a tool + console.printToolComplete(); + + // After tool completion, thought label should switch to "Analysis:" + { + CoutCapture cap; + console.printThought("after"); + EXPECT_TRUE(cap.str().find("Analysis:") != std::string::npos) + << "Expected Analysis: after tool; got: " << cap.str(); + } +} + +// ---- 12. prettyPrintJson Tool Args ---- + +TEST(CleanConsoleTest, PrettyPrintJsonToolArgs) { + CleanConsole console; + CoutCapture cap; + + json args = {{"path", "/tmp/test"}, {"recursive", true}}; + console.prettyPrintJson(args, "Tool Args"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Args:") != std::string::npos) + << "Expected Args: label; got: " << out; + EXPECT_TRUE(out.find("path") != std::string::npos) + << "Expected key 'path'; got: " << out; + EXPECT_TRUE(out.find("/tmp/test") != std::string::npos) + << "Expected value '/tmp/test'; got: " << out; +} + +// ---- 13. prettyPrintJson Tool Result with output ---- + +TEST(CleanConsoleTest, PrettyPrintJsonToolResultOutput) { + CleanConsole console; + CoutCapture cap; + + json result = {{"output", "Hello World"}}; + console.prettyPrintJson(result, "Tool Result"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Output:") != std::string::npos) + << "Expected Output: label; got: " << out; + // The preview box uses bordered output + EXPECT_TRUE(out.find("Hello World") != std::string::npos) + << "Expected output content in preview; got: " << out; + // Verify the preview box borders + EXPECT_TRUE(out.find(".---") != std::string::npos) + << "Expected top border of preview box; got: " << out; + EXPECT_TRUE(out.find("'---") != std::string::npos) + << "Expected bottom border of preview box; got: " << out; +} + +// ---- 14. prettyPrintJson Tool Result with error ---- + +TEST(CleanConsoleTest, PrettyPrintJsonToolResultError) { + CleanConsole console; + CoutCapture cap; + + json result = {{"error", "File not found"}}; + console.prettyPrintJson(result, "Tool Result"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Error:") != std::string::npos) + << "Expected Error: label; got: " << out; + EXPECT_TRUE(out.find("File not found") != std::string::npos) + << "Expected error message; got: " << out; +} + +// ---- 15. prettyPrintJson Tool Result with command ---- + +TEST(CleanConsoleTest, PrettyPrintJsonToolResultCommand) { + CleanConsole console; + CoutCapture cap; + + json result = {{"command", "ipconfig /all"}, {"output", "Windows IP Configuration"}}; + console.prettyPrintJson(result, "Tool Result"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Cmd:") != std::string::npos) + << "Expected Cmd: label; got: " << out; + EXPECT_TRUE(out.find("ipconfig /all") != std::string::npos) + << "Expected command text; got: " << out; +} + +// ---- 16. printError ---- + +TEST(CleanConsoleTest, PrintError) { + CleanConsole console; + CoutCapture cap; + + console.printError("something failed"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("ERROR:") != std::string::npos) + << "Expected ERROR: label; got: " << out; + EXPECT_TRUE(out.find("something failed") != std::string::npos) + << "Expected error message; got: " << out; +} + +// ---- 17. printWarning ---- + +TEST(CleanConsoleTest, PrintWarning) { + CleanConsole console; + CoutCapture cap; + + console.printWarning("disk space low"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("WARNING:") != std::string::npos) + << "Expected WARNING: label; got: " << out; + EXPECT_TRUE(out.find("disk space low") != std::string::npos) + << "Expected warning message; got: " << out; +} + +// ---- 18. printFinalAnswer ---- + +TEST(CleanConsoleTest, PrintFinalAnswer) { + CleanConsole console; + CoutCapture cap; + + console.printFinalAnswer("The answer is 42"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Conclusion") != std::string::npos) + << "Expected Conclusion label; got: " << out; + EXPECT_TRUE(out.find("The answer is 42") != std::string::npos) + << "Expected answer text; got: " << out; + // Verify bordered section (=== lines) + EXPECT_TRUE(out.find("====") != std::string::npos) + << "Expected border lines; got: " << out; +} + +// ---- 19. printFinalAnswer JSON extraction ---- + +TEST(CleanConsoleTest, PrintFinalAnswerJsonExtraction) { + CleanConsole console; + CoutCapture cap; + + // The LLM sometimes returns raw JSON; CleanConsole should extract the "answer" key + console.printFinalAnswer(R"({"answer": "Extracted value", "confidence": 0.95})"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Extracted value") != std::string::npos) + << "Expected extracted answer value; got: " << out; + EXPECT_TRUE(out.find("Conclusion") != std::string::npos) + << "Expected Conclusion label; got: " << out; +} + +// ---- 20. printFinalAnswer empty ---- + +TEST(CleanConsoleTest, PrintFinalAnswerEmpty) { + CleanConsole console; + CoutCapture cap; + + console.printFinalAnswer(""); + + EXPECT_TRUE(cap.str().empty()) + << "Empty final answer should produce no output; got: " << cap.str(); +} + +// ---- 21. printCompletion ---- + +TEST(CleanConsoleTest, PrintCompletion) { + CleanConsole console; + CoutCapture cap; + + console.printCompletion(3, 10); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("3") != std::string::npos) + << "Expected step count 3; got: " << out; + EXPECT_TRUE(out.find("steps") != std::string::npos) + << "Expected 'steps' text; got: " << out; +} + +// ---- 22. printPlan ---- + +TEST(CleanConsoleTest, PrintPlan) { + CleanConsole console; + CoutCapture cap; + + json plan = json::array({ + {{"tool", "diagnose_wifi"}}, + {{"tool", "run_command"}}, + {{"tool", "check_status"}} + }); + console.printPlan(plan, 0); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Plan:") != std::string::npos) + << "Expected Plan: label; got: " << out; + EXPECT_TRUE(out.find("diagnose_wifi") != std::string::npos) + << "Expected tool name diagnose_wifi; got: " << out; + EXPECT_TRUE(out.find("run_command") != std::string::npos) + << "Expected tool name run_command; got: " << out; + EXPECT_TRUE(out.find("check_status") != std::string::npos) + << "Expected tool name check_status; got: " << out; +} + +// ---- 23. printPlan shown once ---- + +TEST(CleanConsoleTest, PrintPlanShownOnce) { + CleanConsole console; + + json plan = json::array({{{"tool", "step1"}}}); + + // First call: should produce output + { + CoutCapture cap; + console.printPlan(plan, 0); + EXPECT_TRUE(cap.str().find("Plan:") != std::string::npos) + << "First call should show plan; got: " << cap.str(); + } + + // Second call: should produce no output (planShown_ is true) + { + CoutCapture cap; + console.printPlan(plan, 1); + EXPECT_TRUE(cap.str().empty()) + << "Second plan call should produce no output; got: " << cap.str(); + } +} + +// ---- 24. printWrapped word-wrap ---- + +TEST(CleanConsoleTest, PrintWrappedWordWrap) { + CleanConsole console; + + // Build a long thought that will exceed the wrap width (78 chars for fallback). + // Each word is ~10 chars; 12 words = ~130 chars + spaces => will wrap. + std::string longText; + for (int i = 0; i < 12; ++i) { + if (i > 0) longText += " "; + longText += "LongWord" + std::to_string(i) + "X"; + } + + CoutCapture cap; + console.printThought(longText); + + std::string out = cap.str(); + // Count newlines -- word-wrapping should produce at least 2 lines + int newlines = 0; + for (char c : out) { + if (c == '\n') ++newlines; + } + EXPECT_GE(newlines, 2) + << "Expected wrapped output with multiple lines; got " << newlines + << " newlines in: " << out; +} + +// ---- 25. printStyledWord bold ---- + +TEST(CleanConsoleTest, PrintStyledWordBold) { + CleanConsole console; + CoutCapture cap; + + // Pass text with **bold** markers through printThought (which calls printWrapped -> printStyledWord) + console.printThought("This is **important** information"); + + std::string out = cap.str(); + // The BOLD ANSI code should appear + EXPECT_TRUE(out.find("\033[1m") != std::string::npos) + << "Expected ANSI bold code; got: " << out; + // The WHITE ANSI code should appear (used for bold text) + EXPECT_TRUE(out.find("\033[97m") != std::string::npos) + << "Expected ANSI white code for bold text; got: " << out; + // The actual word "important" should appear (without the ** markers) + EXPECT_TRUE(out.find("important") != std::string::npos) + << "Expected bold content 'important'; got: " << out; + // The ** markers should NOT appear literally in the output + // (they are consumed by printStyledWord and replaced with ANSI codes) + // Note: we check that "**important**" as a literal substring is absent, + // but "important" surrounded by ANSI codes is present. +} + +// ---- 26. printOutputPreview truncation ---- + +TEST(CleanConsoleTest, PrintOutputPreviewTruncation) { + CleanConsole console; + + // Build output with 15 non-empty lines (exceeds kMaxPreviewLines = 10) + std::string multiLineOutput; + for (int i = 1; i <= 15; ++i) { + multiLineOutput += "Line number " + std::to_string(i) + "\n"; + } + + CoutCapture cap; + + json result = {{"output", multiLineOutput}}; + console.prettyPrintJson(result, "Tool Result"); + + std::string out = cap.str(); + // The first 10 lines should appear + EXPECT_TRUE(out.find("Line number 1") != std::string::npos) + << "Expected first line in preview; got: " << out; + EXPECT_TRUE(out.find("Line number 10") != std::string::npos) + << "Expected 10th line in preview; got: " << out; + // Lines beyond 10 should not appear directly + EXPECT_TRUE(out.find("Line number 11") == std::string::npos) + << "Line 11 should NOT appear in preview; got: " << out; + // The "more lines" message should appear + EXPECT_TRUE(out.find("more lines") != std::string::npos) + << "Expected 'more lines' truncation message; got: " << out; + // Specifically 5 more lines (15 - 10 = 5) + EXPECT_TRUE(out.find("5 more lines") != std::string::npos) + << "Expected '5 more lines'; got: " << out; +} + +// ---- Additional edge-case tests ---- + +TEST(CleanConsoleTest, PrintGoalEmptyString) { + CleanConsole console; + CoutCapture cap; + + console.printGoal(""); + + EXPECT_TRUE(cap.str().empty()) + << "Empty goal should produce no output; got: " << cap.str(); +} + +TEST(CleanConsoleTest, PrintPlanNonArray) { + CleanConsole console; + CoutCapture cap; + + // Non-array JSON should be ignored + console.printPlan(json::object({{"tool", "t1"}}), 0); + + EXPECT_TRUE(cap.str().empty()) + << "Non-array plan should produce no output; got: " << cap.str(); +} + +TEST(CleanConsoleTest, PrettyPrintJsonToolResultNoOutput) { + CleanConsole console; + CoutCapture cap; + + // Tool result with "(no output)" should show "Result: (no output)" + json result = {{"output", "(no output)"}}; + console.prettyPrintJson(result, "Tool Result"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Result:") != std::string::npos) + << "Expected Result: label for no-output case; got: " << out; + EXPECT_TRUE(out.find("(no output)") != std::string::npos) + << "Expected '(no output)' text; got: " << out; +} + +TEST(CleanConsoleTest, PrettyPrintJsonToolResultEmptyOutput) { + CleanConsole console; + CoutCapture cap; + + // Empty output string should show "(no output)" + json result = {{"output", ""}}; + console.prettyPrintJson(result, "Tool Result"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("(no output)") != std::string::npos) + << "Expected '(no output)' for empty output; got: " << out; +} + +TEST(CleanConsoleTest, PrettyPrintJsonToolResultStatus) { + CleanConsole console; + CoutCapture cap; + + json result = {{"status", "completed"}}; + console.prettyPrintJson(result, "Tool Result"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Status:") != std::string::npos) + << "Expected Status: label; got: " << out; + EXPECT_TRUE(out.find("completed") != std::string::npos) + << "Expected status value; got: " << out; +} + +TEST(CleanConsoleTest, PrintFinalAnswerJsonThoughtExtraction) { + CleanConsole console; + CoutCapture cap; + + // When JSON has "thought" key but no "answer" key, it should extract "thought" + console.printFinalAnswer(R"({"thought": "Let me explain this"})"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Let me explain this") != std::string::npos) + << "Expected extracted thought value; got: " << out; +} + +TEST(CleanConsoleTest, PrintFinalAnswerInvalidJson) { + CleanConsole console; + CoutCapture cap; + + // Starts with '{' but is not valid JSON -- should use as-is + console.printFinalAnswer("{not valid json at all"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("{not valid json at all") != std::string::npos) + << "Invalid JSON should be printed as-is; got: " << out; + EXPECT_TRUE(out.find("Conclusion") != std::string::npos) + << "Expected Conclusion label; got: " << out; +} + +TEST(CleanConsoleTest, PrintThoughtCaseInsensitiveMarkers) { + // The code checks both "FINDING:" and "Finding:" (also "DECISION:"/"Decision:") + CleanConsole console; + CoutCapture cap; + + console.printThought("Finding: lowercase marker Decision: also lowercase"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Finding:") != std::string::npos) + << "Expected Finding: label for lowercase marker; got: " << out; + EXPECT_TRUE(out.find("Decision:") != std::string::npos) + << "Expected Decision: label for lowercase marker; got: " << out; +} + +TEST(CleanConsoleTest, NoOpMethodsDoNotCrash) { + // printStateInfo, printInfo, startProgress, stopProgress are no-ops + CleanConsole console; + CoutCapture cap; + + console.printStateInfo("state info"); + console.printInfo("info message"); + console.startProgress("loading..."); + console.stopProgress(); + + // These are no-op methods; verify they produce no output and do not crash + EXPECT_TRUE(cap.str().empty()) + << "No-op methods should produce no output; got: " << cap.str(); +} + +TEST(CleanConsoleTest, PrettyPrintJsonToolArgsEmpty) { + CleanConsole console; + CoutCapture cap; + + // Empty object should produce no output for Tool Args + console.prettyPrintJson(json::object(), "Tool Args"); + + EXPECT_TRUE(cap.str().empty()) + << "Empty Tool Args should produce no output; got: " << cap.str(); +} + +TEST(CleanConsoleTest, PrettyPrintJsonNonToolTitle) { + CleanConsole console; + CoutCapture cap; + + // A title that is neither "Tool Args" nor "Tool Result" should produce no output + json data = {{"key", "value"}}; + console.prettyPrintJson(data, "Something Else"); + + EXPECT_TRUE(cap.str().empty()) + << "Non-tool title should produce no output; got: " << cap.str(); +} + +TEST(CleanConsoleTest, PrintToolUsageStoresLastToolName) { + CleanConsole console; + console.printStepHeader(1, 5); + + { + CoutCapture cap; + console.printToolUsage("my_tool"); + std::string out = cap.str(); + EXPECT_TRUE(out.find("my_tool") != std::string::npos) + << "Expected tool name; got: " << out; + EXPECT_TRUE(out.find("[1/5]") != std::string::npos) + << "Expected step format [1/5]; got: " << out; + } +} + +TEST(CleanConsoleTest, PrettyPrintJsonToolResultErrorPreventsOutput) { + CleanConsole console; + CoutCapture cap; + + // When error is present alongside output, error should be shown and + // we should NOT see the Output: preview (error causes early return) + json result = {{"error", "Permission denied"}, {"output", "should not show"}}; + console.prettyPrintJson(result, "Tool Result"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Error:") != std::string::npos) + << "Expected Error: label; got: " << out; + EXPECT_TRUE(out.find("Permission denied") != std::string::npos) + << "Expected error message; got: " << out; + // The Output: section should not appear because error causes early return + EXPECT_TRUE(out.find("Output:") == std::string::npos) + << "Output: should NOT appear when error is present; got: " << out; +} diff --git a/cpp/tests/test_tool_integration.cpp b/cpp/tests/test_tool_integration.cpp new file mode 100644 index 000000000..c9be6b438 --- /dev/null +++ b/cpp/tests/test_tool_integration.cpp @@ -0,0 +1,986 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Integration tests for WiFi Agent tool registration/execution and +// Health Agent PowerShell output parsing. All mock data is anonymized +// (see ANONYMIZATION section below). No real shell commands are executed. + +#include +#include +#include + +#include + +using namespace gaia; + +// =================================================================== +// Helper: create a ToolParameter (C++17 aggregate workaround) +// =================================================================== +static ToolParameter makeParam(const std::string& name, ToolParamType type, + bool required, const std::string& desc = "") { + ToolParameter p; + p.name = name; + p.type = type; + p.required = required; + p.description = desc; + return p; +} + +// =================================================================== +// RAII stdout capture (matches test_clean_console.cpp pattern) +// =================================================================== +class CoutCapture { +public: + CoutCapture() : captured_(), oldBuf_(std::cout.rdbuf(captured_.rdbuf())) {} + ~CoutCapture() { std::cout.rdbuf(oldBuf_); } + std::string str() const { return captured_.str(); } +private: + std::ostringstream captured_; + std::streambuf* oldBuf_; +}; + +// =================================================================== +// ANONYMIZED MOCK DATA +// +// Hostnames -> TESTPC-001 +// MAC addresses -> AA:BB:CC:DD:EE:01 / AA:BB:CC:DD:EE:02 +// Local IPs -> 10.0.0.100 (host), 10.0.0.1 (gateway) +// User paths -> C:\Users\testuser\ +// SIDs -> S-1-5-21-000000000-000000000-000000000-1001 +// Process IDs -> 1000, 2000, ... +// GUIDs -> {00000000-1111-2222-3333-444444444444} +// =================================================================== + +// ----- WiFi agent mock outputs ----- + +static const char* kMockAdapterOutput = R"( +There is 1 interface on the system: + + Name : Wi-Fi + Description : RZ717 WiFi 7 160MHz + GUID : {00000000-1111-2222-3333-444444444444} + Physical address : AA:BB:CC:DD:EE:01 + Interface type : Primary + State : disconnected + Radio status : Hardware On + Software On +)"; + +static const char* kMockDriverOutput = R"( +Interface name: Wi-Fi + + Driver : RZ717 WiFi 7 160MHz + Vendor : MediaTek, Inc. + Provider : MediaTek, Inc. + Date : 3/18/2025 + Version : 5.5.0.3548 + INF file : oem17.inf + Type : Native Wi-Fi Driver + Radio types supported : 802.11b 802.11a 802.11g 802.11n 802.11ac 802.11ax 802.11be + FIPS 140 mode supported : Yes + 802.11w Management Frame Protection supported : Yes + Hosted network supported : No +)"; + +static const char* kMockIpConfigOutput = R"( +Windows IP Configuration + + Host Name . . . . . . . . . . . . : TESTPC-001 + Primary Dns Suffix . . . . . . . : + Node Type . . . . . . . . . . . . : Hybrid + IP Routing Enabled. . . . . . . . : No + +Ethernet adapter Ethernet 2: + + Description . . . . . . . . . . . : Realtek Gaming 2.5GbE Family Controller + Physical Address. . . . . . . . . : AA-BB-CC-DD-EE-02 + DHCP Enabled. . . . . . . . . . . : Yes + IPv4 Address. . . . . . . . . . . : 10.0.0.100(Preferred) + Subnet Mask . . . . . . . . . . . : 255.255.255.0 + Default Gateway . . . . . . . . . : 10.0.0.1 + DNS Servers . . . . . . . . . . . : 10.0.0.1 +)"; + +static const char* kMockDnsOutput = R"({"Name":"google.com","IPAddress":"142.251.33.206","QueryType":1})"; + +static const char* kMockInternetOutput = R"({"ComputerName":"8.8.8.8","RemotePort":443,"TcpTestSucceeded":true,"PingSucceeded":false,"PingReplyDetails":null})"; + +static const char* kMockPingOutput = R"({"ComputerName":"10.0.0.1","RemoteAddress":"10.0.0.1","PingSucceeded":true,"PingReplyDetails":{"Address":"10.0.0.1","RoundtripTime":1,"Status":"Success"}})"; + +// ----- Health agent mock JSON outputs ----- + +static const char* kMockMemoryJson = R"({"TotalGB": 63.65, "FreeGB": 22.56})"; +static const char* kMockDiskJson = R"({"Name": "C", "UsedGB": 1312.44, "FreeGB": 103.34})"; +static const char* kMockCpuJson = R"({"Name": "AMD RYZEN AI MAX+ 395 w/ Radeon 8060S", "LoadPercentage": 6, "NumberOfCores": 16})"; +static const char* kMockBatteryJson = R"({})"; +static const char* kMockStorageJson = R"({"FriendlyName": "PHISON ESR02TBYCCA4-EDJ-2", "MediaType": "SSD", "SizeGB": 1907.7, "HealthStatus": "Healthy", "OperationalStatus": "OK"})"; + +static const char* kMockGpuJson = R"JSON([ + {"Name":"USB Mobile Monitor Virtual Display","AdapterRAM":null,"DriverVersion":"2.0.0.1","VideoProcessor":null}, + {"Name":"AMD Radeon(TM) 8060S Graphics","AdapterRAM":4293918720,"DriverVersion":"32.0.23027.2005","VideoProcessor":"AMD Radeon Graphics Processor (0x1586)"} +])JSON"; + +static const char* kMockProcessesJson = R"([ + {"Name":"logioptionsplus_agent","CPU_Sec":36014,"MemMB":129,"Id":1000}, + {"Name":"dllhost","CPU_Sec":33124,"MemMB":11.8,"Id":2000}, + {"Name":"svchost","CPU_Sec":28000,"MemMB":45.2,"Id":3000}, + {"Name":"explorer","CPU_Sec":15000,"MemMB":120.5,"Id":4000}, + {"Name":"chrome","CPU_Sec":12000,"MemMB":350.0,"Id":5000}, + {"Name":"code","CPU_Sec":9500,"MemMB":280.3,"Id":6000}, + {"Name":"WindowsTerminal","CPU_Sec":7000,"MemMB":90.1,"Id":7000}, + {"Name":"RuntimeBroker","CPU_Sec":5500,"MemMB":22.4,"Id":8000}, + {"Name":"SearchHost","CPU_Sec":4000,"MemMB":65.7,"Id":9000}, + {"Name":"SystemSettings","CPU_Sec":3200,"MemMB":18.9,"Id":10000} +])"; + +static const char* kMockNetworkConfigJson = R"([ + {"InterfaceAlias":"Ethernet 2","IPv4":"10.0.0.100","Gateway":"10.0.0.1","DNS":"10.0.0.1"}, + {"InterfaceAlias":"Wi-Fi","IPv4":"169.254.154.153","Gateway":null,"DNS":""} +])"; + +static const char* kMockStartupJson = R"([ + {"Name":"AMDNoiseSuppression","Command":"\"C:\\windows\\system32\\AMD\\ANR\\AMDNoiseSuppression.exe\"","Location":"HKU\\S-1-5-21-000000000-000000000-000000000-1001\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run"}, + {"Name":"lemonade-server","Command":"lemonade-server.lnk","Location":"Startup"}, + {"Name":"Discord","Command":"\"C:\\Users\\testuser\\AppData\\Local\\Discord\\Update.exe\" --processStart Discord.exe","Location":"HKU\\S-1-5-21-000000000-000000000-000000000-1001\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run"} +])"; + +static const char* kMockSystemErrorsJson = R"([ + {"TimeCreated":"/Date(1772491907481)/","Id":10010,"Message":"The server {00000000-1111-2222-3333-444444444444} did not register with DCOM within the required timeout."} +])"; + +static const char* kMockWindowsUpdatesJson = R"([ + {"HotFixID":"KB5077181","Description":"Security Update","InstalledOn":{"value":"/Date(1770796800000)/","DateTime":"Wednesday, February 11, 2026 12:00:00 AM"}} +])"; + +static const char* kMockInstalledSoftwareJson = R"([ + {"DisplayName":"AMD Settings","DisplayVersion":"2026.0217.0826.2089","Publisher":"Advanced Micro Devices, Inc.","InstallDate":"20260227"}, + {"DisplayName":"Lemonade Server","DisplayVersion":"9.3.0","Publisher":"AMD","InstallDate":"20260206"} +])"; + +// =================================================================== +// isSafeShellArg — replicated here because the original is file-static +// in wifi_agent.cpp and cannot be imported. +// =================================================================== +static bool isSafeShellArg(const std::string& arg) { + for (char c : arg) { + if (c == ';' || c == '|' || c == '&' || c == '`' || c == '$' + || c == '(' || c == ')' || c == '{' || c == '}' || c == '<' + || c == '>' || c == '"' || c == '\n' || c == '\r') { + return false; + } + } + return !arg.empty(); +} + +// =================================================================== +// MockWiFiAgent — registers the same tool names/schemas as the real +// WiFiTroubleshooterAgent, but callbacks return hardcoded mock data. +// =================================================================== +class MockWiFiAgent : public Agent { +public: + explicit MockWiFiAgent(const AgentConfig& config = {}) : Agent(config) { + init(); + } + +protected: + void registerTools() override { + // ---- check_adapter ---- + toolRegistry().registerTool( + "check_adapter", + "Show Wi-Fi adapter status.", + [](const json& /*args*/) -> json { + return {{"tool", "check_adapter"}, + {"command", "netsh wlan show interfaces"}, + {"output", kMockAdapterOutput}}; + }, + {} + ); + + // ---- check_wifi_drivers ---- + toolRegistry().registerTool( + "check_wifi_drivers", + "Show Wi-Fi driver information.", + [](const json& /*args*/) -> json { + return {{"tool", "check_wifi_drivers"}, + {"command", "netsh wlan show drivers"}, + {"output", kMockDriverOutput}}; + }, + {} + ); + + // ---- check_ip_config ---- + toolRegistry().registerTool( + "check_ip_config", + "Show full IP configuration.", + [](const json& /*args*/) -> json { + return {{"tool", "check_ip_config"}, + {"command", "ipconfig /all"}, + {"output", kMockIpConfigOutput}}; + }, + {} + ); + + // ---- test_dns_resolution ---- + toolRegistry().registerTool( + "test_dns_resolution", + "Test DNS resolution.", + [](const json& args) -> json { + std::string hostname = args.value("hostname", "google.com"); + if (!isSafeShellArg(hostname)) { + return {{"error", "Invalid hostname -- contains disallowed characters"}}; + } + std::string cmd = "Resolve-DnsName -Name " + hostname + + " -Type A | ConvertTo-Json"; + return {{"tool", "test_dns_resolution"}, + {"command", cmd}, + {"hostname", hostname}, + {"output", kMockDnsOutput}}; + }, + {makeParam("hostname", ToolParamType::STRING, false, + "The hostname to resolve (default: google.com)")} + ); + + // ---- test_internet ---- + toolRegistry().registerTool( + "test_internet", + "Test internet connectivity.", + [](const json& /*args*/) -> json { + return {{"tool", "test_internet"}, + {"command", "Test-NetConnection -ComputerName 8.8.8.8 -Port 443 | ConvertTo-Json"}, + {"output", kMockInternetOutput}}; + }, + {} + ); + + // ---- ping_host ---- + toolRegistry().registerTool( + "ping_host", + "Ping a specific host.", + [](const json& args) -> json { + std::string host = args.value("host", ""); + if (host.empty()) { + return {{"error", "host parameter is required"}}; + } + if (!isSafeShellArg(host)) { + return {{"error", "Invalid host -- contains disallowed characters"}}; + } + std::string cmd = "Test-NetConnection -ComputerName " + host + + " | ConvertTo-Json"; + return {{"tool", "ping_host"}, + {"command", cmd}, + {"host", host}, + {"output", kMockPingOutput}}; + }, + {makeParam("host", ToolParamType::STRING, true, + "The hostname or IP address to ping")} + ); + + // ---- flush_dns_cache ---- + toolRegistry().registerTool( + "flush_dns_cache", + "Clear the local DNS resolver cache.", + [](const json& /*args*/) -> json { + return {{"tool", "flush_dns_cache"}, + {"command", "Clear-DnsClientCache"}, + {"status", "completed"}, + {"output", "(no output)"}}; + }, + {} + ); + + // ---- set_dns_servers ---- + toolRegistry().registerTool( + "set_dns_servers", + "Set custom DNS server addresses.", + [](const json& args) -> json { + std::string adapter = args.value("adapter_name", ""); + std::string primary = args.value("primary_dns", ""); + std::string secondary = args.value("secondary_dns", ""); + + if (adapter.empty() || primary.empty()) { + return {{"error", "adapter_name and primary_dns are required"}}; + } + if (!isSafeShellArg(adapter) || !isSafeShellArg(primary) || + (!secondary.empty() && !isSafeShellArg(secondary))) { + return {{"error", "Invalid parameter -- contains disallowed characters"}}; + } + + std::string cmd = "Set-DnsClientServerAddress -InterfaceAlias '" + + adapter + "' -ServerAddresses "; + if (secondary.empty()) { + cmd += "'" + primary + "'"; + } else { + cmd += "('" + primary + "','" + secondary + "')"; + } + + return { + {"tool", "set_dns_servers"}, + {"command", cmd}, + {"adapter_name", adapter}, + {"primary_dns", primary}, + {"secondary_dns", secondary}, + {"status", "completed"}, + {"output", "(no output)"} + }; + }, + { + makeParam("adapter_name", ToolParamType::STRING, true, "Adapter name"), + makeParam("primary_dns", ToolParamType::STRING, true, "Primary DNS"), + makeParam("secondary_dns", ToolParamType::STRING, false, "Secondary DNS") + } + ); + + // ---- renew_dhcp_lease ---- + toolRegistry().registerTool( + "renew_dhcp_lease", + "Release and renew the DHCP lease.", + [](const json& /*args*/) -> json { + return {{"tool", "renew_dhcp_lease"}, + {"command", "ipconfig /release; Start-Sleep -Seconds 1; ipconfig /renew"}, + {"status", "completed"}, + {"output", "DHCP lease renewed"}}; + }, + {} + ); + + // ---- restart_wifi_adapter ---- + toolRegistry().registerTool( + "restart_wifi_adapter", + "Disable and re-enable a network adapter.", + [](const json& args) -> json { + std::string adapter = args.value("adapter_name", ""); + if (adapter.empty()) { + return {{"error", "adapter_name is required"}}; + } + if (!isSafeShellArg(adapter)) { + return {{"error", "Invalid adapter_name -- contains disallowed characters"}}; + } + return { + {"tool", "restart_wifi_adapter"}, + {"command", "Disable-NetAdapter ... Enable-NetAdapter"}, + {"adapter_name", adapter}, + {"status", "completed"}, + {"output", "(no output)"} + }; + }, + {makeParam("adapter_name", ToolParamType::STRING, true, "Adapter name")} + ); + + // ---- enable_wifi_adapter ---- + toolRegistry().registerTool( + "enable_wifi_adapter", + "Enable a disabled Wi-Fi adapter.", + [](const json& args) -> json { + std::string adapter = args.value("adapter_name", ""); + if (adapter.empty()) { + return {{"error", "adapter_name is required"}}; + } + if (!isSafeShellArg(adapter)) { + return {{"error", "Invalid adapter_name -- contains disallowed characters"}}; + } + return { + {"tool", "enable_wifi_adapter"}, + {"command", "Enable-NetAdapter -Name '" + adapter + "'"}, + {"adapter_name", adapter}, + {"status", "completed"}, + {"output", "(no output)"} + }; + }, + {makeParam("adapter_name", ToolParamType::STRING, true, "Adapter name")} + ); + + // ---- toggle_wifi_radio ---- + toolRegistry().registerTool( + "toggle_wifi_radio", + "Turn the Wi-Fi radio ON or OFF.", + [](const json& args) -> json { + std::string state = args.value("state", "on"); + std::string radioState = (state == "off") ? "Off" : "On"; + return { + {"tool", "toggle_wifi_radio"}, + {"command", "Windows Radio API: Set Wi-Fi radio to " + radioState}, + {"requested_state", radioState}, + {"status", "completed"}, + {"output", "Wi-Fi radio set to " + radioState} + }; + }, + {makeParam("state", ToolParamType::STRING, false, + "The desired radio state: 'on' or 'off' (default: 'on')")} + ); + } + + std::string getSystemPrompt() const override { + return "You are a mock WiFi troubleshooter agent for testing."; + } + +public: + ToolRegistry& tools() { return toolRegistry(); } +}; + +// ################################################################### +// +// 1. WiFi Agent Tool Integration +// +// ################################################################### + +class WiFiToolsTest : public ::testing::Test { +protected: + void SetUp() override { + AgentConfig config; + config.silentMode = true; + agent_ = std::make_unique(config); + } + std::unique_ptr agent_; +}; + +TEST_F(WiFiToolsTest, CheckAdapterReturnsExpectedFormat) { + json result = agent_->tools().executeTool("check_adapter", json::object()); + EXPECT_EQ(result["tool"], "check_adapter"); + EXPECT_EQ(result["command"], "netsh wlan show interfaces"); + std::string output = result["output"].get(); + EXPECT_TRUE(output.find("Wi-Fi") != std::string::npos); + EXPECT_TRUE(output.find("RZ717") != std::string::npos); + EXPECT_TRUE(output.find("AA:BB:CC:DD:EE:01") != std::string::npos); + EXPECT_TRUE(output.find("disconnected") != std::string::npos); + EXPECT_TRUE(output.find("{00000000-1111-2222-3333-444444444444}") != std::string::npos); +} + +TEST_F(WiFiToolsTest, CheckWiFiDriversReturnsExpectedFormat) { + json result = agent_->tools().executeTool("check_wifi_drivers", json::object()); + EXPECT_EQ(result["tool"], "check_wifi_drivers"); + EXPECT_EQ(result["command"], "netsh wlan show drivers"); + std::string output = result["output"].get(); + EXPECT_TRUE(output.find("MediaTek") != std::string::npos); + EXPECT_TRUE(output.find("5.5.0.3548") != std::string::npos); + EXPECT_TRUE(output.find("802.11be") != std::string::npos); +} + +TEST_F(WiFiToolsTest, CheckIpConfigReturnsExpectedFormat) { + json result = agent_->tools().executeTool("check_ip_config", json::object()); + EXPECT_EQ(result["tool"], "check_ip_config"); + EXPECT_EQ(result["command"], "ipconfig /all"); + std::string output = result["output"].get(); + EXPECT_TRUE(output.find("TESTPC-001") != std::string::npos); + EXPECT_TRUE(output.find("10.0.0.100") != std::string::npos); + EXPECT_TRUE(output.find("10.0.0.1") != std::string::npos); + EXPECT_TRUE(output.find("AA-BB-CC-DD-EE-02") != std::string::npos); +} + +TEST_F(WiFiToolsTest, TestDnsResolutionDefaultHostname) { + json result = agent_->tools().executeTool("test_dns_resolution", json::object()); + EXPECT_EQ(result["tool"], "test_dns_resolution"); + EXPECT_EQ(result["hostname"], "google.com"); + // The command should reference google.com as default + std::string cmd = result["command"].get(); + EXPECT_TRUE(cmd.find("google.com") != std::string::npos); + // Output should be parseable JSON + json parsedOutput = json::parse(result["output"].get()); + EXPECT_EQ(parsedOutput["Name"], "google.com"); + EXPECT_EQ(parsedOutput["IPAddress"], "142.251.33.206"); + EXPECT_EQ(parsedOutput["QueryType"], 1); +} + +TEST_F(WiFiToolsTest, TestDnsResolutionCustomHostname) { + json result = agent_->tools().executeTool( + "test_dns_resolution", {{"hostname", "cloudflare.com"}}); + EXPECT_EQ(result["hostname"], "cloudflare.com"); + std::string cmd = result["command"].get(); + EXPECT_TRUE(cmd.find("cloudflare.com") != std::string::npos); + // No error should be present + EXPECT_FALSE(result.contains("error")); +} + +TEST_F(WiFiToolsTest, TestInternetReturnsExpectedFormat) { + json result = agent_->tools().executeTool("test_internet", json::object()); + EXPECT_EQ(result["tool"], "test_internet"); + // Output should be parseable JSON + json parsedOutput = json::parse(result["output"].get()); + EXPECT_EQ(parsedOutput["ComputerName"], "8.8.8.8"); + EXPECT_EQ(parsedOutput["RemotePort"], 443); + EXPECT_EQ(parsedOutput["TcpTestSucceeded"], true); + EXPECT_EQ(parsedOutput["PingSucceeded"], false); + EXPECT_TRUE(parsedOutput["PingReplyDetails"].is_null()); +} + +TEST_F(WiFiToolsTest, PingHostReturnsExpectedFormat) { + json result = agent_->tools().executeTool( + "ping_host", {{"host", "10.0.0.1"}}); + EXPECT_EQ(result["tool"], "ping_host"); + EXPECT_EQ(result["host"], "10.0.0.1"); + std::string cmd = result["command"].get(); + EXPECT_TRUE(cmd.find("10.0.0.1") != std::string::npos); + // Output should be parseable JSON + json parsedOutput = json::parse(result["output"].get()); + EXPECT_EQ(parsedOutput["PingSucceeded"], true); +} + +TEST_F(WiFiToolsTest, PingHostMissingArgReturnsError) { + // Empty host + json result = agent_->tools().executeTool("ping_host", json::object()); + EXPECT_TRUE(result.contains("error")); + EXPECT_EQ(result["error"], "host parameter is required"); + EXPECT_FALSE(result.contains("tool")); +} + +TEST_F(WiFiToolsTest, FlushDnsCacheReturnsStatus) { + json result = agent_->tools().executeTool("flush_dns_cache", json::object()); + EXPECT_EQ(result["tool"], "flush_dns_cache"); + EXPECT_EQ(result["status"], "completed"); + EXPECT_EQ(result["command"], "Clear-DnsClientCache"); +} + +TEST_F(WiFiToolsTest, SetDnsServersMissingArgsReturnsError) { + // No arguments at all + json result = agent_->tools().executeTool("set_dns_servers", json::object()); + EXPECT_TRUE(result.contains("error")); + EXPECT_EQ(result["error"], "adapter_name and primary_dns are required"); + + // Only adapter, no primary_dns + result = agent_->tools().executeTool( + "set_dns_servers", {{"adapter_name", "Wi-Fi"}}); + EXPECT_TRUE(result.contains("error")); + EXPECT_EQ(result["error"], "adapter_name and primary_dns are required"); +} + +TEST_F(WiFiToolsTest, SetDnsServersReturnsExpectedFormat) { + json result = agent_->tools().executeTool("set_dns_servers", { + {"adapter_name", "Wi-Fi"}, + {"primary_dns", "8.8.8.8"}, + {"secondary_dns", "8.8.4.4"} + }); + EXPECT_EQ(result["tool"], "set_dns_servers"); + EXPECT_EQ(result["status"], "completed"); + EXPECT_EQ(result["adapter_name"], "Wi-Fi"); + EXPECT_EQ(result["primary_dns"], "8.8.8.8"); + EXPECT_EQ(result["secondary_dns"], "8.8.4.4"); + // Command should include both DNS servers + std::string cmd = result["command"].get(); + EXPECT_TRUE(cmd.find("8.8.8.8") != std::string::npos); + EXPECT_TRUE(cmd.find("8.8.4.4") != std::string::npos); +} + +TEST_F(WiFiToolsTest, RenewDhcpLeaseReturnsStatus) { + json result = agent_->tools().executeTool("renew_dhcp_lease", json::object()); + EXPECT_EQ(result["tool"], "renew_dhcp_lease"); + EXPECT_EQ(result["status"], "completed"); + std::string cmd = result["command"].get(); + EXPECT_TRUE(cmd.find("ipconfig") != std::string::npos); +} + +TEST_F(WiFiToolsTest, RestartWiFiAdapterMissingArgReturnsError) { + json result = agent_->tools().executeTool("restart_wifi_adapter", json::object()); + EXPECT_TRUE(result.contains("error")); + EXPECT_EQ(result["error"], "adapter_name is required"); +} + +TEST_F(WiFiToolsTest, EnableWiFiAdapterMissingArgReturnsError) { + json result = agent_->tools().executeTool("enable_wifi_adapter", json::object()); + EXPECT_TRUE(result.contains("error")); + EXPECT_EQ(result["error"], "adapter_name is required"); +} + +TEST_F(WiFiToolsTest, ToggleWiFiRadioDefaultsToOn) { + // Default state should be "on" + json result = agent_->tools().executeTool("toggle_wifi_radio", json::object()); + EXPECT_EQ(result["tool"], "toggle_wifi_radio"); + EXPECT_EQ(result["requested_state"], "On"); + EXPECT_EQ(result["status"], "completed"); + std::string output = result["output"].get(); + EXPECT_TRUE(output.find("On") != std::string::npos); + + // Explicit "off" + result = agent_->tools().executeTool("toggle_wifi_radio", {{"state", "off"}}); + EXPECT_EQ(result["requested_state"], "Off"); +} + +// ################################################################### +// +// 2. WiFi Agent Input Validation +// +// ################################################################### + +TEST(WiFiInputValidation, SafeHostnameAccepted) { + EXPECT_TRUE(isSafeShellArg("google.com")); + EXPECT_TRUE(isSafeShellArg("cloudflare.com")); + EXPECT_TRUE(isSafeShellArg("192.168.1.1")); + EXPECT_TRUE(isSafeShellArg("my-host.example.org")); + EXPECT_TRUE(isSafeShellArg("10.0.0.1")); + EXPECT_TRUE(isSafeShellArg("localhost")); +} + +TEST(WiFiInputValidation, UnsafeHostnameRejected) { + EXPECT_FALSE(isSafeShellArg("host;rm -rf /")); + EXPECT_FALSE(isSafeShellArg("host|cat /etc/passwd")); + EXPECT_FALSE(isSafeShellArg("host&whoami")); + EXPECT_FALSE(isSafeShellArg("host`id`")); + EXPECT_FALSE(isSafeShellArg("host$PATH")); + EXPECT_FALSE(isSafeShellArg("host(cmd)")); + EXPECT_FALSE(isSafeShellArg("host{cmd}")); + EXPECT_FALSE(isSafeShellArg("hostfile")); + EXPECT_FALSE(isSafeShellArg("host\"quoted")); + EXPECT_FALSE(isSafeShellArg("host\ninjected")); + EXPECT_FALSE(isSafeShellArg("host\rinjected")); +} + +TEST(WiFiInputValidation, EmptyHostnameRejected) { + EXPECT_FALSE(isSafeShellArg("")); +} + +// ################################################################### +// +// 3. Health Agent PowerShell Output Parsing +// +// ################################################################### + +TEST(HealthOutputParsing, MemoryJsonParsesCorrectly) { + json mem = json::parse(kMockMemoryJson); + EXPECT_TRUE(mem.contains("TotalGB")); + EXPECT_TRUE(mem.contains("FreeGB")); + EXPECT_DOUBLE_EQ(mem["TotalGB"].get(), 63.65); + EXPECT_DOUBLE_EQ(mem["FreeGB"].get(), 22.56); +} + +TEST(HealthOutputParsing, DiskJsonParsesCorrectly) { + json disk = json::parse(kMockDiskJson); + EXPECT_EQ(disk["Name"], "C"); + EXPECT_DOUBLE_EQ(disk["UsedGB"].get(), 1312.44); + EXPECT_DOUBLE_EQ(disk["FreeGB"].get(), 103.34); +} + +TEST(HealthOutputParsing, CpuJsonParsesCorrectly) { + json cpu = json::parse(kMockCpuJson); + EXPECT_EQ(cpu["Name"], "AMD RYZEN AI MAX+ 395 w/ Radeon 8060S"); + EXPECT_EQ(cpu["LoadPercentage"], 6); + EXPECT_EQ(cpu["NumberOfCores"], 16); +} + +TEST(HealthOutputParsing, GpuJsonParsesCorrectly) { + json gpu = json::parse(kMockGpuJson); + ASSERT_TRUE(gpu.is_array()); + ASSERT_EQ(gpu.size(), 2u); + + // First GPU — virtual display with null fields + EXPECT_EQ(gpu[0]["Name"], "USB Mobile Monitor Virtual Display"); + EXPECT_TRUE(gpu[0]["AdapterRAM"].is_null()); + EXPECT_EQ(gpu[0]["DriverVersion"], "2.0.0.1"); + EXPECT_TRUE(gpu[0]["VideoProcessor"].is_null()); + + // Second GPU — AMD Radeon with real values + EXPECT_EQ(gpu[1]["Name"], "AMD Radeon(TM) 8060S Graphics"); + EXPECT_EQ(gpu[1]["AdapterRAM"].get(), 4293918720); + EXPECT_EQ(gpu[1]["DriverVersion"], "32.0.23027.2005"); + EXPECT_EQ(gpu[1]["VideoProcessor"], "AMD Radeon Graphics Processor (0x1586)"); +} + +TEST(HealthOutputParsing, ProcessesJsonParsesCorrectly) { + json procs = json::parse(kMockProcessesJson); + ASSERT_TRUE(procs.is_array()); + ASSERT_EQ(procs.size(), 10u); + + // Verify first process + EXPECT_EQ(procs[0]["Name"], "logioptionsplus_agent"); + EXPECT_EQ(procs[0]["CPU_Sec"], 36014); + EXPECT_EQ(procs[0]["MemMB"], 129); + EXPECT_EQ(procs[0]["Id"], 1000); + + // Verify last process + EXPECT_EQ(procs[9]["Name"], "SystemSettings"); + EXPECT_EQ(procs[9]["Id"], 10000); + + // Verify all have required fields + for (const auto& proc : procs) { + EXPECT_TRUE(proc.contains("Name")); + EXPECT_TRUE(proc.contains("CPU_Sec")); + EXPECT_TRUE(proc.contains("MemMB")); + EXPECT_TRUE(proc.contains("Id")); + } +} + +TEST(HealthOutputParsing, NetworkConfigJsonParsesCorrectly) { + json net = json::parse(kMockNetworkConfigJson); + ASSERT_TRUE(net.is_array()); + ASSERT_EQ(net.size(), 2u); + + // First interface — Ethernet with all fields populated + EXPECT_EQ(net[0]["InterfaceAlias"], "Ethernet 2"); + EXPECT_EQ(net[0]["IPv4"], "10.0.0.100"); + EXPECT_EQ(net[0]["Gateway"], "10.0.0.1"); + EXPECT_EQ(net[0]["DNS"], "10.0.0.1"); + + // Second interface — Wi-Fi with null gateway and empty DNS + EXPECT_EQ(net[1]["InterfaceAlias"], "Wi-Fi"); + EXPECT_EQ(net[1]["IPv4"], "169.254.154.153"); + EXPECT_TRUE(net[1]["Gateway"].is_null()); + EXPECT_EQ(net[1]["DNS"], ""); +} + +TEST(HealthOutputParsing, StartupProgramsJsonParsesCorrectly) { + json startup = json::parse(kMockStartupJson); + ASSERT_TRUE(startup.is_array()); + ASSERT_EQ(startup.size(), 3u); + + EXPECT_EQ(startup[0]["Name"], "AMDNoiseSuppression"); + EXPECT_TRUE(startup[0]["Command"].get().find("AMDNoiseSuppression.exe") != std::string::npos); + EXPECT_TRUE(startup[0]["Location"].get().find("S-1-5-21-000000000") != std::string::npos); + + EXPECT_EQ(startup[1]["Name"], "lemonade-server"); + EXPECT_EQ(startup[1]["Location"], "Startup"); + + EXPECT_EQ(startup[2]["Name"], "Discord"); + EXPECT_TRUE(startup[2]["Command"].get().find("testuser") != std::string::npos); +} + +TEST(HealthOutputParsing, SystemErrorsJsonParsesCorrectly) { + json errors = json::parse(kMockSystemErrorsJson); + ASSERT_TRUE(errors.is_array()); + ASSERT_EQ(errors.size(), 1u); + + // Verify Date format string is preserved + std::string timeCreated = errors[0]["TimeCreated"].get(); + EXPECT_TRUE(timeCreated.find("/Date(") != std::string::npos); + EXPECT_TRUE(timeCreated.find(")/") != std::string::npos); + + EXPECT_EQ(errors[0]["Id"], 10010); + EXPECT_TRUE(errors[0]["Message"].get().find("DCOM") != std::string::npos); + // Verify anonymized GUID in message + EXPECT_TRUE(errors[0]["Message"].get().find("00000000-1111-2222-3333-444444444444") != std::string::npos); +} + +TEST(HealthOutputParsing, WindowsUpdatesJsonParsesCorrectly) { + json updates = json::parse(kMockWindowsUpdatesJson); + ASSERT_TRUE(updates.is_array()); + ASSERT_EQ(updates.size(), 1u); + + EXPECT_EQ(updates[0]["HotFixID"], "KB5077181"); + EXPECT_EQ(updates[0]["Description"], "Security Update"); + + // Verify nested InstalledOn object + ASSERT_TRUE(updates[0]["InstalledOn"].is_object()); + json installedOn = updates[0]["InstalledOn"]; + EXPECT_TRUE(installedOn.contains("value")); + EXPECT_TRUE(installedOn.contains("DateTime")); + std::string dateValue = installedOn["value"].get(); + EXPECT_TRUE(dateValue.find("/Date(") != std::string::npos); + std::string dateTime = installedOn["DateTime"].get(); + EXPECT_TRUE(dateTime.find("February") != std::string::npos); +} + +TEST(HealthOutputParsing, BatteryEmptyJsonHandledCorrectly) { + json battery = json::parse(kMockBatteryJson); + EXPECT_TRUE(battery.is_object()); + EXPECT_TRUE(battery.empty()); + // Verify graceful handling: no crash, no fields + EXPECT_FALSE(battery.contains("Status")); + EXPECT_FALSE(battery.contains("ChargePercent")); +} + +TEST(HealthOutputParsing, InstalledSoftwareJsonParsesCorrectly) { + json software = json::parse(kMockInstalledSoftwareJson); + ASSERT_TRUE(software.is_array()); + ASSERT_EQ(software.size(), 2u); + + EXPECT_EQ(software[0]["DisplayName"], "AMD Settings"); + EXPECT_EQ(software[0]["DisplayVersion"], "2026.0217.0826.2089"); + EXPECT_EQ(software[0]["Publisher"], "Advanced Micro Devices, Inc."); + EXPECT_EQ(software[0]["InstallDate"], "20260227"); + + EXPECT_EQ(software[1]["DisplayName"], "Lemonade Server"); + EXPECT_EQ(software[1]["Publisher"], "AMD"); +} + +TEST(HealthOutputParsing, StorageHealthJsonParsesCorrectly) { + json storage = json::parse(kMockStorageJson); + EXPECT_EQ(storage["FriendlyName"], "PHISON ESR02TBYCCA4-EDJ-2"); + EXPECT_EQ(storage["MediaType"], "SSD"); + EXPECT_DOUBLE_EQ(storage["SizeGB"].get(), 1907.7); + EXPECT_EQ(storage["HealthStatus"], "Healthy"); + EXPECT_EQ(storage["OperationalStatus"], "OK"); +} + +// ################################################################### +// +// 4. Tool Result -> CleanConsole Pipeline +// +// ################################################################### + +TEST(ToolConsoleIntegration, WiFiToolResultRendersInConsole) { + CleanConsole console; + CoutCapture cap; + + json result = { + {"tool", "check_adapter"}, + {"command", "netsh wlan show interfaces"}, + {"output", "Name: Wi-Fi\nState: connected"} + }; + console.prettyPrintJson(result, "Tool Result"); + + std::string out = cap.str(); + // Should show the command + EXPECT_TRUE(out.find("Cmd:") != std::string::npos) + << "Expected Cmd: label; got: " << out; + EXPECT_TRUE(out.find("netsh wlan show interfaces") != std::string::npos) + << "Expected command text; got: " << out; + // Should show the output + EXPECT_TRUE(out.find("Output:") != std::string::npos) + << "Expected Output: label; got: " << out; + EXPECT_TRUE(out.find("Wi-Fi") != std::string::npos) + << "Expected output content; got: " << out; +} + +TEST(ToolConsoleIntegration, WiFiToolArgsRendersInConsole) { + CleanConsole console; + CoutCapture cap; + + json args = {{"hostname", "google.com"}}; + console.prettyPrintJson(args, "Tool Args"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Args:") != std::string::npos) + << "Expected Args: label; got: " << out; + EXPECT_TRUE(out.find("hostname") != std::string::npos) + << "Expected key 'hostname'; got: " << out; + EXPECT_TRUE(out.find("google.com") != std::string::npos) + << "Expected value 'google.com'; got: " << out; +} + +TEST(ToolConsoleIntegration, ToolResultWithErrorRendersRedLabel) { + CleanConsole console; + CoutCapture cap; + + json result = {{"error", "host parameter is required"}}; + console.prettyPrintJson(result, "Tool Result"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Error:") != std::string::npos) + << "Expected Error: label; got: " << out; + EXPECT_TRUE(out.find("host parameter is required") != std::string::npos) + << "Expected error message; got: " << out; + // The red ANSI code should appear + EXPECT_TRUE(out.find("\033[91m") != std::string::npos) + << "Expected red ANSI code for error; got: " << out; +} + +TEST(ToolConsoleIntegration, ToolResultWithCommandShowsCmd) { + CleanConsole console; + CoutCapture cap; + + json result = { + {"command", "Clear-DnsClientCache"}, + {"status", "completed"}, + {"output", "(no output)"} + }; + console.prettyPrintJson(result, "Tool Result"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Cmd:") != std::string::npos) + << "Expected Cmd: label; got: " << out; + EXPECT_TRUE(out.find("Clear-DnsClientCache") != std::string::npos) + << "Expected command text; got: " << out; +} + +TEST(ToolConsoleIntegration, ToolResultWithLongOutputTruncates) { + CleanConsole console; + CoutCapture cap; + + // Build output with 15 lines (exceeds kMaxPreviewLines = 10) + std::string longOutput; + for (int i = 1; i <= 15; ++i) { + longOutput += "Adapter line " + std::to_string(i) + "\n"; + } + + json result = {{"output", longOutput}}; + console.prettyPrintJson(result, "Tool Result"); + + std::string out = cap.str(); + // First 10 lines should appear + EXPECT_TRUE(out.find("Adapter line 1") != std::string::npos) + << "Expected first line; got: " << out; + EXPECT_TRUE(out.find("Adapter line 10") != std::string::npos) + << "Expected 10th line; got: " << out; + // Lines beyond 10 should NOT appear + EXPECT_TRUE(out.find("Adapter line 11") == std::string::npos) + << "Line 11 should NOT appear; got: " << out; + // Truncation message + EXPECT_TRUE(out.find("5 more lines") != std::string::npos) + << "Expected '5 more lines' truncation message; got: " << out; +} + +TEST(ToolConsoleIntegration, HealthMcpResultRendersInConsole) { + CleanConsole console; + CoutCapture cap; + + // Simulate an MCP tool result for health agent -- the output field + // contains the raw JSON string from PowerShell. + json result = { + {"output", kMockMemoryJson} + }; + console.prettyPrintJson(result, "Tool Result"); + + std::string out = cap.str(); + EXPECT_TRUE(out.find("Output:") != std::string::npos) + << "Expected Output: label; got: " << out; + // The raw JSON content should be visible + EXPECT_TRUE(out.find("TotalGB") != std::string::npos) + << "Expected TotalGB in output; got: " << out; + EXPECT_TRUE(out.find("63.65") != std::string::npos) + << "Expected memory value in output; got: " << out; +} + +// ################################################################### +// +// 5. Mock WiFi Agent Full Chain +// +// ################################################################### + +class WiFiFullChain : public ::testing::Test { +protected: + void SetUp() override { + AgentConfig config; + config.silentMode = true; + agent_ = std::make_unique(config); + } + std::unique_ptr agent_; +}; + +TEST_F(WiFiFullChain, DiagnosticToolChainExecutes) { + // Execute the diagnostic tools in sequence and verify each returns valid data + json r1 = agent_->tools().executeTool("check_adapter", json::object()); + EXPECT_EQ(r1["tool"], "check_adapter"); + EXPECT_FALSE(r1.contains("error")); + + json r2 = agent_->tools().executeTool("check_ip_config", json::object()); + EXPECT_EQ(r2["tool"], "check_ip_config"); + EXPECT_FALSE(r2.contains("error")); + + json r3 = agent_->tools().executeTool("test_dns_resolution", json::object()); + EXPECT_EQ(r3["tool"], "test_dns_resolution"); + EXPECT_FALSE(r3.contains("error")); + + json r4 = agent_->tools().executeTool("test_internet", json::object()); + EXPECT_EQ(r4["tool"], "test_internet"); + EXPECT_FALSE(r4.contains("error")); + + // Verify the outputs can all be inspected as strings + EXPECT_TRUE(r1["output"].is_string()); + EXPECT_TRUE(r2["output"].is_string()); + EXPECT_TRUE(r3["output"].is_string()); + EXPECT_TRUE(r4["output"].is_string()); + + // Verify that the DNS and Internet outputs are parseable JSON + EXPECT_NO_THROW({ auto parsed = json::parse(r3["output"].get()); (void)parsed; }); + EXPECT_NO_THROW({ auto parsed = json::parse(r4["output"].get()); (void)parsed; }); +} + +TEST_F(WiFiFullChain, FixToolChainExecutes) { + // Execute flush_dns_cache and verify the status is completed + json r1 = agent_->tools().executeTool("flush_dns_cache", json::object()); + EXPECT_EQ(r1["tool"], "flush_dns_cache"); + EXPECT_EQ(r1["status"], "completed"); + EXPECT_FALSE(r1.contains("error")); + + // After flushing DNS, verify dns resolution tool still works + json r2 = agent_->tools().executeTool("test_dns_resolution", json::object()); + EXPECT_EQ(r2["tool"], "test_dns_resolution"); + EXPECT_FALSE(r2.contains("error")); +} diff --git a/docs/docs.json b/docs/docs.json index 65fd7d02f..d312e71df 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -56,6 +56,7 @@ "group": "C++ Framework", "pages": [ "guides/cpp", + "guides/cpp/quickstart", "guides/cpp/overview", "guides/cpp/integration", "guides/cpp/custom-agent", diff --git a/docs/guides/cpp.mdx b/docs/guides/cpp.mdx index 7339de29f..896b4aa1f 100644 --- a/docs/guides/cpp.mdx +++ b/docs/guides/cpp.mdx @@ -7,7 +7,7 @@ description: "Build and run GAIA agents natively in C++17 — same agent loop an - **Native performance** — compiled binary, no interpreter overhead, no Python runtime - **Easy integration** — add to any CMake project via FetchContent, git submodule, or find_package ([Integration Guide](/guides/cpp/integration)) -- **Any LLM backend** — works with Lemonade, llama.cpp, Ollama, vLLM, OpenAI, or any OpenAI-compatible server ([details](/guides/cpp/integration#using-alternative-llm-backends)) +- **Any LLM backend** — works with [Lemonade](https://lemonade-server.ai), llama.cpp, Ollama, vLLM, OpenAI, or any OpenAI-compatible server ([details](/guides/cpp/integration#using-alternative-llm-backends)) - **Full MCP support** — connects to any MCP server via stdio transport - **Same agent loop** — planning, tool execution, error recovery, multi-step plans @@ -19,302 +19,17 @@ description: "Build and run GAIA agents natively in C++17 — same agent loop an The C++ framework targets the base agent only. Specialized agents (Code, Docker, Jira, etc.), the REST API server, RAG, and audio are Python-only. See the [Python quickstart](/quickstart) for the full feature set. -## Use in Your Project +## Explore -Add `gaia_core` to any C++ project with three lines of CMake: - -```cmake -FetchContent_Declare(gaia GIT_REPOSITORY https://github.com/amd/gaia.git GIT_TAG main SOURCE_SUBDIR cpp) -FetchContent_MakeAvailable(gaia) -target_link_libraries(my_app PRIVATE gaia::gaia_core) -``` - -All dependencies are resolved automatically. See the [Integration Guide](/guides/cpp/integration) for git submodule, find_package, and shared library methods. - -## Quick Start - - - - Install [Visual Studio 2022](https://visualstudio.microsoft.com/) (Desktop C++ workload) and [CMake 3.14+](https://cmake.org/download/). Git must be on your PATH (required by CMake FetchContent). - - Also install `uv` for the Windows MCP server: - ```bash - pip install uv - ``` - - - - From the repository root: - - - - ```bat - cd cpp - cmake -B build -G "Visual Studio 17 2022" -A x64 - cmake --build build --config Release - ``` - Binaries land in `cpp\build\Release\`. - - - ```bat - cd cpp - cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release - cmake --build build - ``` - - - ```bash - cd cpp - cmake -B build -DCMAKE_BUILD_TYPE=Release - cmake --build build - ``` - - - - All dependencies (nlohmann/json, cpp-httplib, Google Test) are fetched automatically — no manual installs. - - - - The agent connects to an OpenAI-compatible LLM at `http://localhost:8000/api/v1` by default. - - ```bash - lemonade-server serve - ``` - - See [Setup Guide](/setup) to install Lemonade and download a model. - - - - **Option A: Windows CUA demo** (requires MCP server + `uv`): - ```bat - cpp\build\Release\simple_agent.exe - ``` - The agent connects to the Windows MCP server, gathers CPU/memory/disk metrics via PowerShell, and pastes a formatted report into Notepad. - - **Option B: Wi-Fi Troubleshooter** (no dependencies, run as admin for fix tools): - ```bat - cpp\build\Release\wifi_agent.exe - ``` - Select GPU or NPU backend, then try "Full network diagnostic" or ask a specific question. The agent reasons about each result and adapts its approach in real-time. - - Type `quit` to exit either demo. - - - -## How It Works - -The `simple_agent` demo is a C++ port of the [Windows System Health Agent](/guides/mcp/windows-system-health). It subclasses `gaia::Agent`, connects to the Windows MCP server on startup, then enters the same planning loop as the Python version. - -```mermaid -%%{init: {'theme':'base', 'themeVariables': { 'primaryColor':'#ED1C24', 'fontSize':'18px', 'lineColor':'#ED1C24', 'primaryTextColor':'#fff', 'edgeLabelBackground':'#1a1a1a'}, 'flowchart': {'curve': 'basis'}}}%% -graph TB - User("User query") - Agent("WindowsSystemHealthAgent\n(gaia::Agent subclass)") - LLM("LLM\nhttp://localhost:8000") - MCP("Windows MCP Server\nuvx windows-mcp") - - User --> Agent - Agent -->|"chat/completions"| LLM - LLM -->|"plan + tool calls"| Agent - Agent -->|"JSON-RPC 2.0 stdio"| MCP - - MCP --> Shell1("mcp_windows_Shell\nGet-CimInstance (Memory)") - MCP --> Shell2("mcp_windows_Shell\nGet-PSDrive (Disk)") - MCP --> Shell3("mcp_windows_Shell\nGet-WmiObject (CPU)") - MCP --> Clipboard("mcp_windows_Shell\nSet-Clipboard") - MCP --> Notepad("mcp_windows_Shell\nStart-Process notepad") - MCP --> Paste("mcp_windows_Shortcut\nctrl+v") - - style User fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff - style Agent fill:#ED1C24,stroke:#F4484D,stroke-width:3px,color:#fff - style LLM fill:#C8171E,stroke:#ED1C24,stroke-width:2px,color:#fff - style MCP fill:#F4484D,stroke:#ED1C24,stroke-width:2px,color:#fff - style Shell1 fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff - style Shell2 fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff - style Shell3 fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff - style Clipboard fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff - style Notepad fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff - style Paste fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff - - linkStyle default stroke:#ED1C24,stroke-width:2px -``` - -**Execution flow:** -1. User query enters `agent.processQuery()` -2. Agent composes a system prompt (tool list + response format) and calls the LLM -3. LLM returns structured JSON with reasoning and a tool call -4. Agent executes the tool (via MCP or registered callback) and feeds the result back -5. LLM reasons about the result, decides the next action, and the loop continues -6. When the LLM has enough information, it provides a final answer - -## Wi-Fi Troubleshooter Demo - -The `wifi_agent` demonstrates **adaptive reasoning** without MCP — all tools are registered directly in C++ as PowerShell commands. This showcases how an agent differs from a script: it reasons about each result, skips irrelevant steps, applies fixes, and verifies outcomes. - -**Key features:** -- **Structured reasoning** — LLM outputs `FINDING:` and `DECISION:` prefixes, displayed with color-coded labels in the TUI -- **Adaptive behavior** — skips downstream checks if adapter is disconnected, adds fix/verify steps when issues are found -- **Real tools** — all diagnostics (`netsh`, `ipconfig`, `Test-NetConnection`) and fixes (`flush DNS`, `toggle Wi-Fi radio`, `restart adapter`) execute real PowerShell commands -- **GPU/NPU selection** — choose between GGUF (GPU) and FLM (NPU) model backends at startup -- **Admin detection** — warns on startup if fix tools won't work without elevation - -## Under the Hood - -### Reactive Agent Loop - -The agent is **not a script**. After every tool execution, the LLM is called again with the full conversation so far — including the tool's output. This lets the model reason about results and change course. - -```mermaid -%%{init: {'theme':'base', 'themeVariables': { 'primaryColor':'#ED1C24', 'fontSize':'16px', 'lineColor':'#ED1C24', 'primaryTextColor':'#fff'}, 'flowchart': {'curve': 'basis'}}}%% -graph LR - A["Call LLM"] --> B{"Parse JSON"} - B -->|"has tool"| C["Execute tool"] - C --> D["Feed result back"] - D --> A - B -->|"has answer"| E["Done"] - - style A fill:#ED1C24,stroke:#F4484D,stroke-width:2px,color:#fff - style B fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff - style C fill:#C8171E,stroke:#ED1C24,stroke-width:2px,color:#fff - style D fill:#F4484D,stroke:#ED1C24,stroke-width:2px,color:#fff - style E fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff - linkStyle default stroke:#ED1C24,stroke-width:2px -``` - -Each loop iteration: **LLM reasons → agent executes → result fed back → LLM reasons again**. The LLM can skip steps, add new ones, or pivot strategy at any point. - -### How Tools Are Implemented - -Tools are C++ lambdas registered with `ToolRegistry`. The Wi-Fi agent's tools wrap PowerShell commands via a `runShell()` helper that uses `_popen()` to spawn a PowerShell subprocess: - -```cpp -// Simplified — each tool follows this pattern: -toolRegistry().registerTool( - "check_adapter", // name the LLM sees - "Check Wi-Fi adapter status and signal", // description the LLM reads - [](const gaia::json& args) -> gaia::json { - std::string output = runShell("netsh wlan show interfaces"); - return {{"tool", "check_adapter"}, {"output", output}}; - }, - {} // parameter schema -); -``` - -The agent itself is pure C++. PowerShell is just the shell subprocess that executes system commands (`netsh`, `ipconfig`, `Test-NetConnection`). For complex operations like the WinRT Radio API, the tool writes a temporary `.ps1` script and runs it via `powershell -File`. - -### Structured Reasoning Display - -The system prompt instructs the LLM to prefix its reasoning with `FINDING:` and `DECISION:`. The custom `CleanConsole` output handler parses these and displays them with color-coded labels: - -- **Finding** (green) — what the diagnostic data shows -- **Decision** (yellow) — what the agent will do next and *why* - -This is what distinguishes an agent from a script: the decision points are visible. When the agent skips a step ("adapter is disconnected — IP checks would fail"), applies a fix, or re-runs a diagnostic to verify, you can see the reasoning that drove that choice. - -## Writing Your Own Agent - -Subclass `gaia::Agent`, override `getSystemPrompt()` and optionally `registerTools()`, then call `init()` at the end of your constructor: - -```cpp -#include - -class MyAgent : public gaia::Agent { -public: - MyAgent() : Agent(makeConfig()) { - init(); // registers tools and composes system prompt - } - -protected: - std::string getSystemPrompt() const override { - return "You are a helpful assistant. Use tools to answer questions."; - } - - void registerTools() override { - toolRegistry().registerTool( - "get_time", - "Return the current UTC time.", - [](const gaia::json&) -> gaia::json { - return {{"time", "2026-02-24T00:00:00Z"}}; - }, - {} // no parameters - ); - } - -private: - static gaia::AgentConfig makeConfig() { - gaia::AgentConfig cfg; - cfg.baseUrl = "http://localhost:8000/api/v1"; - cfg.modelId = "Qwen3-4B-GGUF"; // or any model on your server - cfg.maxSteps = 20; - return cfg; - } -}; - -int main() { - MyAgent agent; - auto result = agent.processQuery("What time is it?"); - std::cout << result["result"].get() << std::endl; -} -``` - -To connect an MCP server and auto-register its tools: - -```cpp -agent.connectMcpServer("my_server", { - {"command", "uvx"}, - {"args", {"my-mcp-package"}} -}); -// Tools are now available as mcp_my_server_ -``` - -## AgentConfig Reference - -| Field | Default | Description | -|-------|---------|-------------| -| `baseUrl` | `http://localhost:8000/api/v1` | LLM server endpoint (any OpenAI-compatible server) | -| `modelId` | `Qwen3-4B-GGUF` | Model identifier sent to the server | -| `maxSteps` | `20` | Maximum agent loop iterations per query | -| `maxPlanIterations` | `3` | Maximum plan/replan cycles before forcing completion | -| `maxConsecutiveRepeats` | `4` | Consecutive identical responses before loop-detection triggers | -| `maxHistoryMessages` | `40` | Max messages kept between queries (0 = unlimited) | -| `contextSize` | `16384` | LLM context window size in tokens (`n_ctx`) | -| `debug` | `false` | Enable verbose debug logging to stdout | -| `showPrompts` | `false` | Print full system prompts and LLM responses | -| `streaming` | `false` | Enable streaming responses from the LLM | -| `silentMode` | `false` | Suppress all console output (use `SilentConsole`) | - -## Running Tests - -```bat -cd cpp\build -ctest -C Release --output-on-failure -``` - -Or directly: - -```bat -cpp\build\Release\gaia_tests.exe --gtest_color=yes -``` - -The test suite covers all six modules: agent loop, tool registry, JSON utilities, MCP client, console output, and types. - -## Comparison with Python GAIA - -| Feature | Python | C++ | -|---------|--------|-----| -| Agent loop (plan → tool → answer) | ✓ | ✓ | -| Tool registration | ✓ | ✓ | -| MCP client (stdio) | ✓ | ✓ | -| JSON parsing with fallbacks | ✓ | ✓ | -| OpenAI-compatible LLM backend | ✓ | ✓ | -| Multiple LLM providers (Claude, OpenAI) | ✓ | planned | -| Specialized agents (Code, Docker, Jira…) | ✓ | not ported | -| REST API server | ✓ | not ported | -| Audio / RAG / Stable Diffusion | ✓ | not ported | + + + Build and run a GAIA C++ agent in minutes — prerequisites, build steps, and demo walkthrough + -## Next Steps + + Architecture, agent execution flow, configuration reference, and project structure + - FetchContent, git submodule, find_package, shared library — plus alternative LLM backends diff --git a/docs/guides/cpp/custom-agent.mdx b/docs/guides/cpp/custom-agent.mdx index a7fc65252..c94792629 100644 --- a/docs/guides/cpp/custom-agent.mdx +++ b/docs/guides/cpp/custom-agent.mdx @@ -441,6 +441,134 @@ std::cout << consolePtr->finalAnswer() << "\n"; --- +## Step 7 — Embedding in Your Application + +When integrating the agent into a desktop application (WPF, Qt, Electron), you need to run it headless, capture its output programmatically, and keep your UI responsive. + +### Headless Pattern + +Use `SilentConsole` to suppress all terminal output. The only interface is the JSON return value from `processQuery()`: + +```cpp +#include +#include + +gaia::AgentConfig cfg; +cfg.silentMode = true; // uses SilentConsole automatically + +MyAgent agent; +// No terminal output — agent runs silently +gaia::json result = agent.processQuery("diagnose network issues"); + +std::string answer = result["result"].get(); +int steps = result["steps_taken"].get(); +``` + +### Background Thread Pattern + +Since `processQuery()` blocks, run it on a background thread and post results to your UI: + +```cpp +#include +#include + +// Launch agent on background thread +std::future future = std::async(std::launch::async, [&agent]() { + return agent.processQuery("run full diagnostic"); +}); + +// UI thread can check if done or wait +if (future.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { + gaia::json result = future.get(); + // Update UI with result +} +``` + +### Custom OutputHandler for UI Integration + +For real-time progress updates in a GUI, implement a custom `OutputHandler` that forwards events to your UI framework: + +```cpp +class UIOutputHandler : public gaia::OutputHandler { +public: + // Callback types — set these to your UI update functions + std::function onStepStart; + std::function onThought; + std::function onToolCall; + std::function onToolResult; + std::function onAnswer; + std::function onError; + + void printStepHeader(int step, int limit) override { + if (onStepStart) onStepStart(step, limit); + } + void printThought(const std::string& t) override { + if (onThought) onThought(t); + } + void printToolUsage(const std::string& name) override { + if (onToolCall) onToolCall(name); + } + void prettyPrintJson(const gaia::json& data, const std::string&) override { + if (onToolResult) onToolResult(data); + } + void printFinalAnswer(const std::string& answer) override { + if (onAnswer) onAnswer(answer); + } + void printError(const std::string& msg) override { + if (onError) onError(msg); + } + + // Minimal no-op implementations for remaining required methods + void printProcessingStart(const std::string&, int, const std::string&) override {} + void printStateInfo(const std::string&) override {} + void printGoal(const std::string&) override {} + void printPlan(const gaia::json&, int) override {} + void printToolComplete() override {} + void printWarning(const std::string&) override {} + void printInfo(const std::string&) override {} + void startProgress(const std::string&) override {} + void stopProgress() override {} + void printCompletion(int, int) override {} +}; +``` + +Wire it up: + +```cpp +auto ui = std::make_unique(); +ui->onStepStart = [](int step, int total) { + // Update progress bar: step / total +}; +ui->onThought = [](const std::string& thought) { + // Display agent reasoning in a text panel +}; +ui->onAnswer = [](const std::string& answer) { + // Show final answer in result area +}; + +agent.setOutputHandler(std::move(ui)); +``` + + +**Thread safety:** The `OutputHandler` methods are called from the thread running `processQuery()`. If your callbacks update a GUI, you must post to the UI thread (e.g., `QMetaObject::invokeMethod` in Qt, `Dispatcher.Invoke` in WPF, `PostMessage` in Win32). + + +### Multiple Agents + +Each `Agent` instance is independent. You can run multiple agents concurrently on separate threads — each with its own tools, MCP connections, and output handler: + +```cpp +// Each agent gets its own config, tools, and output handler +MyDiagnosticAgent diagAgent; +MyReportAgent reportAgent; + +// Safe to run in parallel — no shared state +auto f1 = std::async(std::launch::async, [&] { return diagAgent.processQuery("check health"); }); +auto f2 = std::async(std::launch::async, [&] { return reportAgent.processQuery("generate report"); }); +``` + +--- + ## Complete Working Example Below is a self-contained agent combining all six customization points. Copy it as a starting point for your own agent: diff --git a/docs/guides/cpp/integration.mdx b/docs/guides/cpp/integration.mdx index 1d4c261fc..f198e4ccc 100644 --- a/docs/guides/cpp/integration.mdx +++ b/docs/guides/cpp/integration.mdx @@ -132,7 +132,11 @@ That gives you `#include `, the full agent loop, tool registry, MC ```bash cmake -B build -S cpp -DCMAKE_BUILD_TYPE=Release + ``` + ```bash cmake --build build --config Release + ``` + ```bash cmake --install build --prefix /usr/local ``` @@ -169,6 +173,8 @@ That gives you `#include `, the full agent loop, tool registry, MC ```bash cmake -B build -S cpp -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release + ``` + ```bash cmake --build build --config Release ``` @@ -276,7 +282,7 @@ target_link_libraries(time_agent PRIVATE gaia::gaia_core) ## Using Alternative LLM Backends -The GAIA C++ agent framework is **not tied to Lemonade or any specific LLM provider**. It talks to a standard HTTP endpoint — any server that implements the OpenAI chat completions API works out of the box. Switch backends by changing two fields: +The GAIA C++ agent framework is **not tied to [Lemonade](https://lemonade-server.ai) or any specific LLM provider**. It talks to a standard HTTP endpoint — any server that implements the OpenAI chat completions API works out of the box. Switch backends by changing two fields: ```cpp gaia::AgentConfig cfg; @@ -360,7 +366,7 @@ That is the entire API surface. No embeddings endpoint, no streaming (unless `cf - [Lemonade Server](https://github.com/amd/lemonade) is the default backend, optimized for AMD Ryzen AI NPU and GPU acceleration. + [Lemonade Server](https://lemonade-server.ai) is the default backend, optimized for AMD Ryzen AI NPU and GPU acceleration. ```bash lemonade-server serve diff --git a/docs/guides/cpp/overview.mdx b/docs/guides/cpp/overview.mdx index 8161b6d65..a9fcc28fc 100644 --- a/docs/guides/cpp/overview.mdx +++ b/docs/guides/cpp/overview.mdx @@ -1,11 +1,11 @@ --- title: "C++ Framework Overview" -description: "Native C++17 port of the GAIA base agent -- agent loop, tool registry, MCP client, and cross-platform stdio, no Python runtime required" +description: "Native C++17 port of the GAIA base agent — agent loop, tool registry, MCP client, and cross-platform stdio, no Python runtime required" icon: "code" --- - **Source Code:** [`cpp/`](https://github.com/amd/gaia/tree/main/cpp) -- lives alongside the Python package in the GAIA repository. + **Source Code:** [`cpp/`](https://github.com/amd/gaia/tree/main/cpp) — lives alongside the Python package in the GAIA repository. @@ -39,11 +39,11 @@ Specialized agents (Code, Docker, Jira, Blender), the REST API server, RAG, audi ```mermaid %%{init: {'theme':'base', 'themeVariables': { 'primaryColor':'#ED1C24', 'fontSize':'16px', 'lineColor':'#ED1C24', 'primaryTextColor':'#fff', 'edgeLabelBackground':'#1a1a1a'}, 'flowchart': {'curve': 'basis'}}}%% -flowchart LR - A["User Query"] --> B["gaia::Agent\nprocessQuery()"] - B -->|"HTTP POST\nchat/completions"| C["LLM Server\n(Lemonade)"] - C -->|"JSON response\nplan + tool calls"| B - B -->|"JSON-RPC 2.0\nstdio"| D["MCP Server\n(subprocess)"] +flowchart TD + A["User Query"] --> B["gaia::Agent
processQuery()"] + B -->|"HTTP POST
chat/completions"| C["LLM Server
(Lemonade)"] + C -->|"JSON response
plan + tool calls"| B + B -->|"JSON-RPC 2.0
stdio"| D["MCP Server
(subprocess)"] D -->|"tool result"| B B --> E["Final Answer"] @@ -58,136 +58,113 @@ flowchart LR 1. User query enters `agent.processQuery()` 2. Agent composes a system prompt (tool descriptions + response format) and sends it to the LLM via HTTP 3. LLM returns a JSON plan with tool calls -4. Agent executes each tool -- either a locally registered C++ callback or a remote MCP tool via JSON-RPC 2.0 over stdio +4. Agent executes each tool — either a locally registered C++ callback or a remote MCP tool via JSON-RPC 2.0 over stdio 5. Tool results feed back to the LLM for further reasoning 6. Loop repeats until the LLM produces a final answer or the step limit is reached +### Reactive Agent Loop + +The agent is **not a script**. After every tool execution, the LLM is called again with the full conversation so far — including the tool's output. This lets the model reason about results and change course. + +```mermaid +%%{init: {'theme':'base', 'themeVariables': { 'primaryColor':'#ED1C24', 'fontSize':'16px', 'lineColor':'#ED1C24', 'primaryTextColor':'#fff'}, 'flowchart': {'curve': 'basis'}}}%% +graph LR + A["Call LLM"] --> B{"Parse JSON"} + B -->|"has tool"| C["Execute tool"] + C --> D["Feed result back"] + D --> A + B -->|"has answer"| E["Done"] + + style A fill:#ED1C24,stroke:#F4484D,stroke-width:2px,color:#fff + style B fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff + style C fill:#C8171E,stroke:#ED1C24,stroke-width:2px,color:#fff + style D fill:#F4484D,stroke:#ED1C24,stroke-width:2px,color:#fff + style E fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff + linkStyle default stroke:#ED1C24,stroke-width:2px +``` + +Each loop iteration: **LLM reasons → agent executes → result fed back → LLM reasons again**. The LLM can skip steps, add new ones, or pivot strategy at any point. + --- -## Quick Start +## How It Works -### Prerequisites +The `health_agent` demo is a C++ port of the [Windows System Health Agent](/guides/mcp/windows-system-health). It subclasses `gaia::Agent`, connects to the Windows MCP server on startup, then enters the same planning loop as the Python version. -| Tool | Minimum Version | Notes | -|------|----------------|-------| -| CMake | 3.14+ | `cmake --version` | -| C++ Compiler | C++17 | MSVC 2019+, GCC 9+, or Clang 10+ | -| Git | any | Required by CMake FetchContent | -| Lemonade Server | latest | OpenAI-compatible LLM backend | +```mermaid +%%{init: {'theme':'base', 'themeVariables': { 'primaryColor':'#ED1C24', 'fontSize':'18px', 'lineColor':'#ED1C24', 'primaryTextColor':'#fff', 'edgeLabelBackground':'#1a1a1a'}, 'flowchart': {'curve': 'basis'}}}%% +graph TB + User("User query") + Agent("WindowsSystemHealthAgent
gaia::Agent subclass") + LLM("LLM
http://localhost:8000") + MCP("Windows MCP Server
uvx windows-mcp") + + User --> Agent + Agent -->|"chat/completions"| LLM + LLM -->|"plan + tool calls"| Agent + Agent -->|"JSON-RPC 2.0 stdio"| MCP + + MCP --> Shell1("mcp_windows_Shell
Get-CimInstance (Memory)") + MCP --> Shell2("mcp_windows_Shell
Get-PSDrive (Disk)") + MCP --> Shell3("mcp_windows_Shell
Get-CimInstance (CPU)") + MCP --> Shell4("mcp_windows_Shell
Get-CimInstance (GPU)") + MCP --> FileWrite("mcp_windows_Shell
Out-File report.txt") + MCP --> Notepad("mcp_windows_Shell
Start-Process notepad") + + style User fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff + style Agent fill:#ED1C24,stroke:#F4484D,stroke-width:3px,color:#fff + style LLM fill:#C8171E,stroke:#ED1C24,stroke-width:2px,color:#fff + style MCP fill:#F4484D,stroke:#ED1C24,stroke-width:2px,color:#fff + style Shell1 fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff + style Shell2 fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff + style Shell3 fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff + style Shell4 fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff + style FileWrite fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff + style Notepad fill:#2d2d2d,stroke:#ED1C24,stroke-width:2px,color:#fff - - **First time here?** Complete the [Setup](/setup) guide first to install Lemonade Server and download a model. - + linkStyle default stroke:#ED1C24,stroke-width:2px +``` -### Build - - - - ```bash - git clone https://github.com/amd/gaia.git - cd gaia - ``` - - - - - - ```bat - cd cpp - cmake -B build -G "Visual Studio 17 2022" -A x64 - cmake --build build --config Release - ``` - Binaries land in `cpp\build\Release\`. - - - ```bat - cd cpp - cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release - cmake --build build - ``` - - - ```bash - cd cpp - cmake -B build -DCMAKE_BUILD_TYPE=Release - cmake --build build - ``` - Binaries land in `cpp/build/`. - - - - All dependencies (nlohmann/json, cpp-httplib, Google Test) are fetched automatically by CMake -- no manual installs required. - - - - The agent connects to any OpenAI-compatible LLM server at `http://localhost:8000/api/v1` by default. The recommended backend is [Lemonade Server](https://github.com/amd/lemonade) (optimized for AMD hardware): - - ```bash - lemonade-server serve - ``` - - - **Not using Lemonade?** Any OpenAI-compatible server works — llama.cpp, Ollama, vLLM, or a cloud endpoint. Just set `AgentConfig::baseUrl` and `AgentConfig::modelId`. See the [Integration Guide](/guides/cpp/integration#using-alternative-llm-backends) for configuration examples. - - - - - - - ```bat - cpp\build\Release\simple_agent.exe - ``` - - - ```bash - ./cpp/build/simple_agent - ``` - - - - Then try: - ``` - You: Run a full system health analysis. - ``` - - **On Windows:** The agent connects to the Windows MCP server, gathers CPU/memory/disk metrics via PowerShell, and pastes a formatted report into Notepad. - - **On Linux/macOS:** The `simple_agent` demo compiles but requires the Windows MCP server to function. Try the [Wi-Fi Troubleshooter Agent](/guides/cpp/wifi-agent) instead, or write your own agent with platform-appropriate tools. - - Type `quit` to exit. - - - Also try the [Wi-Fi Troubleshooter Agent](/guides/cpp/wifi-agent) -- a pure registered-tool agent that diagnoses and fixes network issues without MCP. - - - +### Wi-Fi Troubleshooter Demo ---- +The `wifi_agent` demonstrates **adaptive reasoning** without MCP — all tools are registered directly in C++ as PowerShell commands. This showcases how an agent differs from a script: it reasons about each result, skips irrelevant steps, applies fixes, and verifies outcomes. -## AgentConfig Reference +**Key features:** +- **Structured reasoning** — LLM outputs `FINDING:` and `DECISION:` prefixes, displayed with color-coded labels in the TUI +- **Adaptive behavior** — skips downstream checks if adapter is disconnected, adds fix/verify steps when issues are found +- **Real tools** — all diagnostics (`netsh`, `ipconfig`, `Test-NetConnection`) and fixes (`flush DNS`, `toggle Wi-Fi radio`, `restart adapter`) execute real PowerShell commands +- **GPU/NPU selection** — choose between GGUF (GPU) and FLM (NPU) model backends at startup +- **Admin detection** — warns on startup if fix tools won't work without elevation -All fields have sensible defaults. Override only what you need: +See the [Wi-Fi Troubleshooter Agent guide](/guides/cpp/wifi-agent) for a full walkthrough. + +### How Tools Are Implemented + +Tools are C++ lambdas registered with `ToolRegistry`. The Wi-Fi agent's tools wrap PowerShell commands via a `runShell()` helper that uses `_popen()` to spawn a PowerShell subprocess: ```cpp -gaia::AgentConfig config; -config.baseUrl = "http://localhost:8000/api/v1"; -config.maxSteps = 30; -config.debug = true; +// Simplified — each tool follows this pattern: +toolRegistry().registerTool( + "check_adapter", // name the LLM sees + "Check Wi-Fi adapter status and signal", // description the LLM reads + [](const gaia::json& args) -> gaia::json { + std::string output = runShell("netsh wlan show interfaces"); + return {{"tool", "check_adapter"}, {"output", output}}; + }, + {} // parameter schema +); ``` -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `baseUrl` | `std::string` | `"http://localhost:8000/api/v1"` | LLM server endpoint (any OpenAI-compatible server) | -| `modelId` | `std::string` | `"Qwen3-4B-GGUF"` | Model identifier sent to the server | -| `maxSteps` | `int` | `20` | Maximum agent loop iterations per query | -| `maxPlanIterations` | `int` | `3` | Maximum plan/replan cycles before forcing completion | -| `maxConsecutiveRepeats` | `int` | `4` | Consecutive identical responses before loop-detection triggers | -| `maxHistoryMessages` | `int` | `40` | Max messages kept between queries (0 = unlimited) | -| `contextSize` | `int` | `16384` | LLM context window size in tokens (`n_ctx`) | -| `debug` | `bool` | `false` | Enable verbose debug logging to stdout | -| `showPrompts` | `bool` | `false` | Print full system prompts and LLM responses | -| `silentMode` | `bool` | `false` | Suppress all console output (use `SilentConsole`) | -| `streaming` | `bool` | `false` | Enable streaming responses from the LLM | +The agent itself is pure C++. PowerShell is just the shell subprocess that executes system commands (`netsh`, `ipconfig`, `Test-NetConnection`). For complex operations like the WinRT Radio API, the tool writes a temporary `.ps1` script and runs it via `powershell -File`. + +### Structured Reasoning Display + +The system prompt instructs the LLM to prefix its reasoning with `FINDING:` and `DECISION:`. The custom `CleanConsole` output handler parses these and displays them with color-coded labels: + +- **Finding** (green) — what the diagnostic data shows +- **Decision** (yellow) — what the agent will do next and *why* + +This is what distinguishes an agent from a script: the decision points are visible. When the agent skips a step ("adapter is disconnected — IP checks would fail"), applies a fix, or re-runs a diagnostic to verify, you can see the reasoning that drove that choice. --- @@ -253,6 +230,33 @@ agent.connectMcpServer("my_server", { --- +## AgentConfig Reference + +All fields have sensible defaults. Override only what you need: + +```cpp +gaia::AgentConfig config; +config.baseUrl = "http://localhost:8000/api/v1"; +config.maxSteps = 30; +config.debug = true; +``` + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `baseUrl` | `std::string` | `"http://localhost:8000/api/v1"` | LLM server endpoint ([Lemonade Server](https://lemonade-server.ai) recommended; other OpenAI-compatible servers untested) | +| `modelId` | `std::string` | `"Qwen3-4B-GGUF"` | Model identifier sent to the server | +| `maxSteps` | `int` | `20` | Maximum agent loop iterations per query | +| `maxPlanIterations` | `int` | `3` | Maximum plan/replan cycles before forcing completion | +| `maxConsecutiveRepeats` | `int` | `4` | Consecutive identical responses before loop-detection triggers | +| `maxHistoryMessages` | `int` | `40` | Max messages kept between queries (0 = unlimited) | +| `contextSize` | `int` | `16384` | LLM context window size in tokens (`n_ctx`) | +| `debug` | `bool` | `false` | Enable verbose debug logging to stdout | +| `showPrompts` | `bool` | `false` | Print full system prompts and LLM responses | +| `silentMode` | `bool` | `false` | Suppress all console output (use `SilentConsole`) | +| `streaming` | `bool` | `false` | Enable streaming responses from the LLM (planned -- not yet implemented) | + +--- + ## Project Structure ``` @@ -265,14 +269,16 @@ cpp/ mcp_client.h # MCP JSON-RPC client (stdio transport) json_utils.h # JSON extraction with multi-strategy fallback console.h # TerminalConsole / SilentConsole output handlers + clean_console.h # CleanConsole — polished TUI with colors and word-wrap src/ agent.cpp # Agent loop state machine tool_registry.cpp mcp_client.cpp # Cross-platform subprocess + pipes json_utils.cpp console.cpp + clean_console.cpp examples/ - simple_agent.cpp # Windows System Health Agent (CUA demo) + health_agent.cpp # Windows System Health Agent (CUA demo) wifi_agent.cpp # Wi-Fi Troubleshooter Agent (registered tools) tests/ test_agent.cpp @@ -280,6 +286,8 @@ cpp/ test_json_utils.cpp test_mcp_client.cpp test_console.cpp + test_clean_console.cpp + test_tool_integration.cpp test_types.cpp ``` @@ -301,35 +309,357 @@ cpp/ --- -## Running Tests +## Error Handling & Recovery + +The framework handles failures at every layer — LLM connection, JSON parsing, and tool execution — so your agent doesn't crash on transient errors. + +### LLM Connection Failures + +If the LLM server is unreachable or returns an error, the agent **retries once automatically**, then exits gracefully: + +``` +Call LLM → fails → retry once → fails again → return error result +``` + +The return value on LLM failure: +```json +{ + "result": "Unable to complete task due to LLM error: Connection refused", + "steps_taken": 1, + "steps_limit": 20 +} +``` + +Your application should check the `result` field — there is no exception to catch. HTTP timeouts: 30s connection, 120s read. + +### Malformed JSON Recovery + +Local LLMs often return imperfect JSON. The parser applies **six extraction strategies** in sequence: -```bat -cd cpp/build -ctest -C Release --output-on-failure +1. Direct JSON parse +2. Extract from markdown code blocks (`` ```json ... ``` ``) +3. Bracket-matching — find first complete `{...}` in mixed text +4. Fix common syntax errors (trailing commas, single quotes, missing brackets) +5. Regex extraction of individual fields (`"thought"`, `"tool"`, `"answer"`) +6. Treat entire response as a plain-text conversational answer + +This means the agent recovers from most LLM formatting errors without any intervention. + +### Tool Execution Errors + +When a tool callback throws an exception or returns `{"status": "error", ...}`, the agent enters **error recovery mode**: + +1. The error is captured (exceptions are caught, not propagated) +2. The error context is sent back to the LLM: *"Tool execution failed. Please try an alternative approach."* +3. The LLM reasons about the error and may try a different tool or strategy +4. If the LLM cannot recover within `maxSteps`, the agent returns the last error as the result + +Tool errors never crash the agent. The error flow: + +```cpp +try { + result = tool->callback(args); +} catch (const std::exception& e) { + result = {{"status", "error"}, {"error", "Tool execution failed: " + e.what()}}; +} +// → error context sent to LLM → LLM adapts → loop continues +``` + +### MCP Auto-Reconnect + +If an MCP server disconnects mid-session (process crash, timeout), the agent **reconnects automatically**: + +``` +MCP tool call → fails → reconnect to server → retry tool call → success or return error ``` -Or run the test binary directly: +The subprocess is re-launched and re-initialized. If reconnection fails, the tool call returns an error and the LLM is notified. - - - ```bat - cpp\build\Release\gaia_tests.exe --gtest_color=yes - ``` - - - ```bash - ./cpp/build/gaia_tests --gtest_color=yes - ``` - - +### Loop Detection -The test suite covers all six modules: agent loop, tool registry, JSON utilities, MCP client, console output, and types. +The agent detects **infinite tool call loops** — when the LLM calls the same tool with the same arguments 4+ times in a row. When detected, the agent stops and returns: + +``` +"Task stopped due to repeated tool call loop." +``` + +--- + +## Thread Safety + +### Blocking Semantics + +**`processQuery()` is fully blocking.** It runs the complete agent loop (LLM calls, tool executions, history management) on the calling thread and returns only when a final answer is produced or the step limit is reached. + +This means: +- Do **not** call `processQuery()` from a UI thread — it will freeze the UI for the duration of the agent run +- Use a background thread or async wrapper for GUI integration + +### Concurrent Agent Instances + +**Different `Agent` instances are fully independent** and can run in parallel on separate threads. Each agent owns its own conversation history, tool registry, MCP connections, and output handler. + +```cpp +// SAFE — separate instances on separate threads +Agent agent1(config1); +Agent agent2(config2); + +std::thread t1([&] { agent1.processQuery("query 1"); }); +std::thread t2([&] { agent2.processQuery("query 2"); }); +t1.join(); +t2.join(); +``` + +### Single-Agent Rules + +**Do NOT call `processQuery()` concurrently on the same agent instance.** There are no internal locks — concurrent calls will corrupt conversation history and produce undefined behavior. + +```cpp +// NOT SAFE — same instance, two threads +Agent agent(config); +std::thread t1([&] { agent.processQuery("query 1"); }); // race condition +std::thread t2([&] { agent.processQuery("query 2"); }); // race condition +``` + +Similarly, do not call `connectMcpServer()` or `disconnectMcpServer()` while `processQuery()` is running. + +--- + +## Security Model + +### Tool Registration Is Explicit + +Only tools registered via `registerTool()` or discovered from a connected MCP server are available. There is no reflection, auto-discovery, or dynamic code execution. The LLM can only call tools that your code has explicitly registered. + +### Tool Callback Responsibility + +The framework **does not validate tool arguments** before passing them to your callback. Each tool is responsible for: + +- Validating its input parameters (types, ranges, formats) +- Sanitizing paths and shell arguments +- Rejecting unexpected or dangerous inputs + +Example — a safe file-reading tool: + +```cpp +toolRegistry().registerTool("read_file", "Read a text file", + [](const gaia::json& args) -> gaia::json { + std::string path = args.value("path", ""); + + // Validate: reject path traversal + if (path.find("..") != std::string::npos) { + return {{"status", "error"}, {"error", "Path traversal not allowed"}}; + } + + // Validate: restrict to allowed directory + if (path.find("/allowed/dir/") != 0) { + return {{"status", "error"}, {"error", "Access denied"}}; + } + + // Safe to read + std::ifstream f(path); + std::string content((std::istreambuf_iterator(f)), + std::istreambuf_iterator()); + return {{"content", content}}; + }, + {{"path", gaia::ToolParamType::STRING, true, "File path to read"}} +); +``` + +### MCP Server Trust + +MCP servers are **trusted implicitly** — all tools they expose are registered without review. Only connect to MCP servers you control. In production, audit the tool list returned by each server before deployment. + +### Prompt Injection + +The LLM decides which tool to call based on user input and conversation history. A malicious user could craft input that causes the LLM to misuse a tool. Mitigations: + +- **Validate in the tool callback** — don't trust the LLM's argument choices blindly +- **Use restrictive tool descriptions** — describe exactly what the tool does and what arguments it accepts +- **Limit tool scope** — register only the tools needed for your use case +- **Consider confirmation flows** — for destructive operations, require user confirmation before executing + +### Conversation History + +Conversation history persists between `processQuery()` calls on the same agent. Previous queries and tool results are visible to subsequent LLM calls. For multi-user scenarios, create a new `Agent` instance per user session to prevent data leakage. + +--- + +## Production Deployment + +### Binary Sizes + +Measured with MSVC 2022 Release build (x64): + +| Artifact | Size | Notes | +|----------|------|-------| +| `gaia_core.lib` (static) | ~18 MB | Includes statically linked nlohmann_json and cpp-httplib | +| Example executable | ~400-440 KB | Linked against static library | +| Shared library (DLL) | Smaller | Build with `-DBUILD_SHARED_LIBS=ON` — ships only framework code | + +The static library is large because it bundles all dependencies. When building as a shared library (DLL), the binary is significantly smaller since dependencies are linked dynamically. + +### DLL / Shared Library + +The framework supports both static and shared library builds. DLL export macros (`GAIA_API`) are already applied to all public classes: + +```bash +# Build as shared library (DLL on Windows, .so on Linux) +cmake -B build -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release +cmake --build build --config Release +``` + +When consuming the DLL, the `GAIA_API` macro automatically switches from `__declspec(dllexport)` to `__declspec(dllimport)`. + +### Install Targets + +The CMake install target produces a complete SDK package: + +```bash +cmake --install build --prefix /path/to/install +``` + +This creates: +``` +/path/to/install/ + include/gaia/ # All public headers + lib/gaia_core.lib # Library (static or import lib) + lib/cmake/gaia_core/ # CMake config for find_package() + bin/gaia_core.dll # DLL (shared builds only) +``` + +Consumers use `find_package(gaia_core)` to link against the installed SDK. + +### Runtime Configuration + +The LLM endpoint can be configured at runtime via environment variable — no recompilation needed: + +```bash +# Override the default LLM server URL +set LEMONADE_BASE_URL=http://my-server:8080/api/v1 +my_agent.exe +``` + +All other `AgentConfig` fields are set at construction time. For dynamic configuration, read from a config file or registry in your `makeConfig()` function. + +### HTTPS Support + +HTTPS is supported via OpenSSL (enabled by default with `GAIA_ENABLE_SSL=ON`). For local-only deployments where the LLM runs on `localhost`, you can disable SSL to remove the OpenSSL dependency: + +```bash +cmake -B build -DGAIA_ENABLE_SSL=OFF +``` + +--- + +## API Quick Reference + +### Agent + +```cpp +class Agent { +public: + explicit Agent(const AgentConfig& config = {}); + virtual ~Agent(); + + // Main execution — blocking, returns {"result": "...", "steps_taken": N} + json processQuery(const std::string& userInput, int maxSteps = 0); + + // MCP server management + bool connectMcpServer(const std::string& name, const json& config); + void disconnectMcpServer(const std::string& name); + void disconnectAllMcp(); + + // Output handler (for custom UI integration) + OutputHandler& console(); + void setOutputHandler(std::unique_ptr handler); + + // Tool registry access + const ToolRegistry& tools() const; + ToolRegistry& toolRegistry(); + + // System prompt + std::string systemPrompt() const; + void rebuildSystemPrompt(); // call after adding tools dynamically + +protected: + virtual void registerTools() {} // override to register domain tools + virtual std::string getSystemPrompt() const; // override for agent-specific instructions + void init(); // call at end of subclass constructor +}; +``` + +### ToolRegistry + +```cpp +class ToolRegistry { +public: + void registerTool(const std::string& name, const std::string& description, + ToolCallback callback, std::vector params = {}, + bool atomic = false); + + json executeTool(const std::string& name, const json& args) const; + + const ToolInfo* findTool(const std::string& name) const; + bool hasTool(const std::string& name) const; + bool removeTool(const std::string& name); + size_t size() const; + void clear(); +}; +``` + +### OutputHandler + +Subclass to integrate agent output with your own UI. All methods are virtual: + +```cpp +class OutputHandler { +public: + virtual void printProcessingStart(const std::string& query, int maxSteps, + const std::string& modelId = "") = 0; + virtual void printStepHeader(int stepNum, int stepLimit) = 0; + virtual void printThought(const std::string& thought) = 0; + virtual void printGoal(const std::string& goal) = 0; + virtual void printToolUsage(const std::string& toolName) = 0; + virtual void printToolComplete() = 0; + virtual void prettyPrintJson(const json& data, const std::string& title = "") = 0; + virtual void printError(const std::string& message) = 0; + virtual void printWarning(const std::string& message) = 0; + virtual void printInfo(const std::string& message) = 0; + virtual void printFinalAnswer(const std::string& answer) = 0; + virtual void printCompletion(int stepsTaken, int stepsLimit) = 0; + // ... plus progress indicators, debug methods +}; +``` + +See the [Customizing Your Agent](/guides/cpp/custom-agent) guide for full OutputHandler examples including headless/embedded usage. + +### MCPClient + +```cpp +class MCPClient { +public: + static MCPClient fromConfig(const std::string& name, const json& config, + int timeout = 30, bool debug = false); + + bool connect(); + void disconnect(); + bool isConnected() const; + + std::vector listTools(bool refresh = false); + json callTool(const std::string& toolName, const json& arguments); +}; +``` --- ## Next Steps + + Prerequisites, build steps, and running your first demo agent + + Consume gaia_core in your own CMake project via FetchContent, find_package, or shared library @@ -343,11 +673,7 @@ The test suite covers all six modules: agent loop, tool registry, JSON utilities - The Python version of the same demo -- compare the two implementations - - - - How MCP client-server integration works in GAIA + The Python version of the same demo — compare the two implementations diff --git a/docs/guides/cpp/quickstart.mdx b/docs/guides/cpp/quickstart.mdx new file mode 100644 index 000000000..561124a48 --- /dev/null +++ b/docs/guides/cpp/quickstart.mdx @@ -0,0 +1,171 @@ +--- +title: "C++ Quick Start" +description: "Build and run a GAIA C++ agent in minutes — prerequisites, build steps, and demo walkthrough" +icon: "rocket" +--- + +## Prerequisites + +| Tool | Minimum Version | Notes | +|------|----------------|-------| +| CMake | 3.14+ | `cmake --version` | +| C++ Compiler | C++17 | MSVC 2019+, GCC 9+, or Clang 10+ | +| Git | any | Required by CMake FetchContent | +| [Lemonade Server](https://lemonade-server.ai) | latest | OpenAI-compatible LLM backend | + + + **First time here?** Complete the [Setup](/setup) guide first to install [Lemonade Server](https://lemonade-server.ai) and download a model. + + +## Build + + + + ```bash + git clone https://github.com/amd/gaia.git + cd gaia + ``` + + + + + + ```bat + cd cpp + ``` + ```bat + cmake -B build -G "Visual Studio 17 2022" -A x64 + ``` + ```bat + cmake --build build --config Release + ``` + Binaries land in `cpp\build\Release\`. + + + ```bat + cd cpp + ``` + ```bat + cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release + ``` + ```bat + cmake --build build + ``` + + + ```bash + cd cpp + ``` + ```bash + cmake -B build -DCMAKE_BUILD_TYPE=Release + ``` + ```bash + cmake --build build + ``` + Binaries land in `cpp/build/`. + + + + All dependencies (nlohmann/json, cpp-httplib, Google Test) are fetched automatically by CMake — no manual installs required. + + + + The agent connects to any OpenAI-compatible LLM server at `http://localhost:8000/api/v1` by default. The recommended backend is [Lemonade Server](https://lemonade-server.ai) (optimized for AMD hardware): + + ```bash + lemonade-server serve + ``` + + + **Not using Lemonade?** Any OpenAI-compatible server works — llama.cpp, Ollama, vLLM, or a cloud endpoint. Just set `AgentConfig::baseUrl` and `AgentConfig::modelId`. See the [Integration Guide](/guides/cpp/integration#using-alternative-llm-backends) for configuration examples. + + + + + + + **Option A: Windows CUA demo** (requires MCP server + `uv`): + ```bat + cpp\build\Release\health_agent.exe + ``` + The agent connects to the Windows MCP server, gathers CPU/memory/disk/GPU metrics via PowerShell, and presents results in the console or writes a full report to Notepad. + + **Option B: Wi-Fi Troubleshooter** (no dependencies, run as admin for fix tools): + ```bat + cpp\build\Release\wifi_agent.exe + ``` + Select GPU or NPU backend, then try "Full network diagnostic" or ask a specific question. The agent reasons about each result and adapts its approach in real-time. + + + ```bash + ./cpp/build/health_agent + ``` + The `health_agent` demo compiles but requires the Windows MCP server to function. Try the [Wi-Fi Troubleshooter Agent](/guides/cpp/wifi-agent) instead, or write your own agent with platform-appropriate tools. + + + + Type `quit` to exit. + + + Also try the [Wi-Fi Troubleshooter Agent](/guides/cpp/wifi-agent) — a pure registered-tool agent that diagnoses and fixes network issues without MCP. + + + + +## Running Tests + +```bat +cd cpp/build +``` +```bat +ctest -C Release --output-on-failure +``` + +Or run the test binary directly: + + + + ```bat + cpp\build\Release\gaia_tests.exe --gtest_color=yes + ``` + + + ```bash + ./cpp/build/gaia_tests --gtest_color=yes + ``` + + + +The test suite covers all eight modules: agent loop, tool registry, JSON utilities, MCP client, console output, clean console, tool integration, and types. + +## Next Steps + + + + Architecture, agent execution flow, how tools work, and project structure + + + + Add gaia_core to your own CMake project via FetchContent, find_package, or shared library + + + + Custom prompts, typed tools, MCP servers, output capture, and AgentConfig tuning + + + + Full network diagnostic and auto-fix using only registered C++ tools + + + +--- + + + +**License** + +Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. + +SPDX-License-Identifier: MIT + + diff --git a/docs/guides/cpp/wifi-agent.mdx b/docs/guides/cpp/wifi-agent.mdx index 4c9e0d3f8..97bf0e947 100644 --- a/docs/guides/cpp/wifi-agent.mdx +++ b/docs/guides/cpp/wifi-agent.mdx @@ -10,7 +10,7 @@ icon: "wifi" **Platform:** Windows (PowerShell network commands). Compiles on Linux/macOS for CI but tools require Windows to return real data. -**Prerequisite:** [Lemonade Server](/setup) running with a model loaded. +**Prerequisite:** [Lemonade Server](https://lemonade-server.ai) running with a model loaded. --- @@ -24,7 +24,7 @@ Here's what makes it interesting: 1. **It runs real commands** — not simulated. Every tool executes an actual PowerShell command on your machine and returns the real output. 2. **The LLM decides what to do** — the agent doesn't follow a hardcoded if/else tree. The LLM reads each tool's output, reasons about it, and decides the next step. 3. **It's pure C++** — no Python interpreter, no MCP server subprocess, no external dependencies. Just a compiled binary talking to a local LLM. -4. **Everything is local** — the LLM runs on your AMD hardware via Lemonade Server. No data leaves your machine. +4. **Everything is local** — the LLM runs on your AMD hardware via [Lemonade Server](https://lemonade-server.ai). No data leaves your machine. --- @@ -102,13 +102,22 @@ static std::string runShell(const std::string& command) { std::string result; std::array buffer; + struct PipeCloser { + void operator()(FILE* f) const { #ifdef _WIN32 - std::unique_ptr pipe( - _popen(fullCmd.c_str(), "r"), _pclose); + if (f) _pclose(f); #else - std::unique_ptr pipe( - popen(fullCmd.c_str(), "r"), pclose); + if (f) pclose(f); #endif + } + }; + std::unique_ptr pipe( +#ifdef _WIN32 + _popen(fullCmd.c_str(), "r") +#else + popen(fullCmd.c_str(), "r") +#endif + ); if (!pipe) { return "{\"error\": \"Failed to execute command\"}"; @@ -129,7 +138,7 @@ static std::string runShell(const std::string& command) { **Why `_popen` / `popen`?** These are the C runtime functions for spawning a child process and reading its stdout as a file stream. The `#ifdef _WIN32` blocks keep the code cross-platform — it compiles on Linux for CI, even though the PowerShell commands only work on Windows. -**Why `std::unique_ptr` with a custom deleter?** This ensures the pipe is always closed properly, even if an exception occurs. The deleter (`_pclose` or `pclose`) runs automatically when the `unique_ptr` goes out of scope. +**Why a `PipeCloser` struct?** The `std::unique_ptr` with a custom deleter ensures the pipe is always closed properly, even if an exception occurs. The `PipeCloser` functor calls `_pclose` (Windows) or `pclose` (POSIX) automatically when the `unique_ptr` goes out of scope. --- @@ -171,13 +180,16 @@ toolRegistry().registerTool( if (host.empty()) { return {{"error", "host parameter is required"}}; } + if (!isSafeShellArg(host)) { // reject shell metacharacters + return {{"error", "Invalid host — contains disallowed characters"}}; + } std::string cmd = "Test-NetConnection -ComputerName " + host + " | Select-Object ComputerName, RemoteAddress, " "PingSucceeded, PingReplyDetails" + " | ConvertTo-Json"; std::string output = runShell(cmd); - return {{"tool", "ping_host"}, {"host", host}, {"output", output}}; + return {{"tool", "ping_host"}, {"command", cmd}, {"host", host}, {"output", output}}; }, { {"host", gaia::ToolParamType::STRING, /*required=*/true, @@ -235,6 +247,25 @@ toolRegistry().registerTool( **Pattern:** Use `args.value("key", "")` from nlohmann/json — it returns the default if the key is missing, which handles optional parameters cleanly. +#### Input Validation: Preventing Shell Injection + +Since tool arguments come from the LLM (which could hallucinate or be prompt-injected), the agent validates all string arguments before passing them to shell commands: + +```cpp +static bool isSafeShellArg(const std::string& arg) { + for (char c : arg) { + if (c == ';' || c == '|' || c == '&' || c == '`' || c == '$' + || c == '(' || c == ')' || c == '{' || c == '}' || c == '<' + || c == '>' || c == '"' || c == '\n' || c == '\r') { + return false; + } + } + return !arg.empty(); +} +``` + +This rejects shell metacharacters that could escape the intended command. Every tool that accepts LLM-provided arguments (`ping_host`, `test_dns_resolution`, `set_dns_servers`, `restart_wifi_adapter`, `enable_wifi_adapter`) calls `isSafeShellArg()` before constructing the shell command. **Always validate LLM-provided arguments before passing them to shell commands.** + --- ### 3. The System Prompt: Teaching the Agent How to Think @@ -245,28 +276,33 @@ The system prompt is where you define the agent's behavior. Without it, the LLM std::string getSystemPrompt() const override { return R"(You are an expert Windows network troubleshooter... -## DIAGNOSTIC PROTOCOL (follow this order) - -1. **Adapter Check** — call `check_adapter` -2. **IP Configuration** — call `check_ip_config` -3. **Gateway Ping** — call `ping_host` with the gateway IP from step 2 -4. **DNS Resolution** — call `test_dns_resolution` -5. **Internet Connectivity** — call `test_internet` - -## FIX PROTOCOL (apply only if diagnostics reveal issues) - -- **No IP / DHCP failure** → call `renew_dhcp_lease` -- **DNS failure** → call `flush_dns_cache`, re-test; if still failing, - call `set_dns_servers` with Google DNS (8.8.8.8 / 8.8.4.4) -- **Adapter disconnected** → call `restart_wifi_adapter` -- After any fix, re-run the relevant diagnostic to confirm - -## OUTPUT FORMAT - -End every response with one of: -- **RESOLVED** — all diagnostics pass or issue was fixed -- **PARTIALLY RESOLVED** — some issues fixed but others remain -- **NEEDS MANUAL ACTION** — requires user intervention +## AVAILABLE DIAGNOSTIC SEQUENCE + +For a full network diagnostic, the typical sequence is: +1. `check_adapter` — adapter present and connected? +2. `check_ip_config` — valid IP, gateway, DNS servers? +3. `ping_host` — gateway reachable? +4. `test_dns_resolution` — name resolution working? +5. `test_internet` — end-to-end connectivity? +6. `test_bandwidth` — download and upload speed acceptable? + +## FIXING ISSUES + +When you find a problem, fix it and verify: +1. Apply the fix +2. Re-run the diagnostic that failed to verify the fix worked +3. If the fix failed, try the next option + +## FINAL ANSWER + +Only provide an "answer" after ALL tool calls are complete. +Format as a bulleted summary: +- Adapter: OK/FAIL - SSID name, signal strength % +- IP Config: OK/FAIL - IP address, gateway +- DNS: OK/FAIL - resolver working/not +- Internet: OK/FAIL - connectivity status +- Speed: download XX Mbps / upload XX Mbps +- Status: RESOLVED / PARTIALLY RESOLVED / NEEDS MANUAL ACTION ...)"; } ``` @@ -317,17 +353,17 @@ The system prompt instructs the LLM to follow this decision tree. The LLM doesn' ```mermaid %%{init: {'theme':'base', 'themeVariables': { 'primaryColor':'#ED1C24', 'fontSize':'14px', 'lineColor':'#ED1C24', 'primaryTextColor':'#fff', 'edgeLabelBackground':'#1a1a1a'}, 'flowchart': {'curve': 'basis'}}}%% flowchart TD - A["1. check_adapter\nWi-Fi adapter present?"] -->|Yes| B["2. check_ip_config\nIP address via DHCP?"] + A["1. check_adapter
Wi-Fi adapter present?"] -->|Yes| B["2. check_ip_config
IP address via DHCP?"] A -->|"Radio Off"| F4["FIX: toggle_wifi_radio"] A -->|"Disabled"| F5["FIX: enable_wifi_adapter"] A -->|"Stuck"| F1["FIX: restart_wifi_adapter"] - B -->|Valid IP| C["3. ping_host\nGateway reachable?"] + B -->|Valid IP| C["3. ping_host
Gateway reachable?"] B -->|No IP / APIPA| F2["FIX: renew_dhcp_lease"] - C -->|Reachable| D["4. test_dns_resolution\nDNS resolving?"] + C -->|Reachable| D["4. test_dns_resolution
DNS resolving?"] C -->|Unreachable| F1 - D -->|Resolving| E["5. test_internet\nPort 443 open?"] - D -->|Failing| F3["FIX: flush_dns_cache\nthen set_dns_servers"] - E -->|Connected| BW["6. test_bandwidth\nSpeed test"] + D -->|Resolving| E["5. test_internet
Port 443 open?"] + D -->|Failing| F3["FIX: flush_dns_cache
then set_dns_servers"] + E -->|Connected| BW["6. test_bandwidth
Speed test"] E -->|Failed| M["NEEDS MANUAL ACTION"] BW --> R["RESOLVED"] F1 --> A @@ -397,7 +433,7 @@ private: ## The Interactive Loop -The `main()` function follows the same pattern as the `simple_agent` example: +The `main()` function follows the same pattern as the `health_agent` example: ```cpp int main() { @@ -408,18 +444,33 @@ int main() { WiFiTroubleshooterAgent agent(modelId); - // Interactive loop with diagnostic menu + // Predefined menu options (number → prompt) + const std::vector> kDiagnosticMenu = { + {"Full network diagnostic", "Run a full network diagnostic."}, + {"Check Wi-Fi adapter", "Check my Wi-Fi adapter status."}, + // ... more options ... + }; + std::string userInput; while (true) { - // Show numbered menu: [1] Full diagnostic, [2] Check adapter, ... - printDiagnosticMenu(); + // Show numbered menu + for (size_t i = 0; i < kDiagnosticMenu.size(); ++i) + std::cout << " [" << i+1 << "] " << kDiagnosticMenu[i].first << "\n"; std::cout << " > " << std::flush; std::getline(std::cin, userInput); if (userInput == "quit" || userInput == "exit" || userInput == "q") break; + if (userInput.empty()) continue; + + // Map numbered selection to prompt, or use free-text input directly + std::string query; + if (userInput.size() == 1 && userInput[0] >= '1' + && userInput[0] <= '0' + static_cast(kDiagnosticMenu.size())) { + query = kDiagnosticMenu[userInput[0] - '1'].second; + } else { + query = userInput; + } - // Map numbered selection to pre-written prompt, or use free-text - std::string query = mapMenuSelection(userInput); auto result = agent.processQuery(query); // Final answer is printed by CleanConsole::printFinalAnswer() (void)result; @@ -444,7 +495,11 @@ int main() { ```bat cd cpp + ``` + ```bat cmake -B build -G "Visual Studio 17 2022" -A x64 + ``` + ```bat cmake --build build --config Release ``` Binary: `cpp\build\Release\wifi_agent.exe` @@ -452,14 +507,22 @@ int main() { ```bat cd cpp + ``` + ```bat cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release + ``` + ```bat cmake --build build ``` ```bash cd cpp + ``` + ```bash cmake -B build -DCMAKE_BUILD_TYPE=Release + ``` + ```bash cmake --build build ``` @@ -554,7 +617,7 @@ int main() { ### Why registered tools instead of MCP? -The `simple_agent` example uses MCP — it connects to `uvx windows-mcp` which runs as a separate subprocess and exposes PowerShell as an MCP tool. That works, but adds a dependency on `uvx`, Python, and the MCP server package. +The `health_agent` example uses MCP — it connects to `uvx windows-mcp` which runs as a separate subprocess and exposes PowerShell as an MCP tool. That works, but adds a dependency on `uvx`, Python, and the MCP server package. The Wi-Fi agent takes the opposite approach: every tool is a C++ lambda registered directly with the agent. The PowerShell commands are hardcoded strings. This means: diff --git a/docs/roadmap.mdx b/docs/roadmap.mdx index 4e6c84e56..8eb584720 100644 --- a/docs/roadmap.mdx +++ b/docs/roadmap.mdx @@ -41,6 +41,7 @@ graph TD F("MCP Docs Server
AI assistant context") G("AI PC Agents Hub
Discover & compete") V("Vision SDK
Document processing pipeline") + CPP("C++ Framework
Production readiness") end subgraph Q3["Q3 2026"] @@ -53,6 +54,7 @@ graph TD C --> C2 C2 --> D D --> F + D --> CPP F --> V V --> G G --> H @@ -66,6 +68,7 @@ graph TD style V fill:#ED1C24,stroke:#C8171E,stroke-width:2px,color:#fff style F fill:#ED1C24,stroke:#C8171E,stroke-width:2px,color:#fff style G fill:#ED1C24,stroke:#C8171E,stroke-width:2px,color:#fff + style CPP fill:#ED1C24,stroke:#C8171E,stroke-width:2px,color:#fff style H fill:#2d2d2d,stroke:#555,stroke-width:2px,color:#fff style Q1 fill:transparent,stroke:#F4484D,stroke-width:2px,stroke-dasharray: 5 5,color:#F4484D @@ -255,6 +258,38 @@ Key capabilities: → [View detailed plan](/plans/vision-sdk) • [Vote with 👍 on GitHub](https://github.com/amd/gaia/issues/325) +### Q2 2026: C++ Framework Production Readiness + +**Make the native C++ agent framework ready for third-party integration** + +The [C++ Agent Framework](/guides/cpp) shipped in Q1 2026 as a native C++17 port of the base agent — same agent loop, tool registry, and MCP support, compiled to a standalone binary with no Python runtime. The next phase focuses on production readiness: + +```cpp +// Cancellation support +gaia::CancellationToken token; +std::thread([&] { agent.processQuery("diagnose network", 20, token); }).detach(); +token.cancel(); // graceful stop from UI thread + +// Structured event callbacks for GUI integration +agent.addEventListener(std::make_shared()); + +// Tool security policies +toolRegistry().registerTool("restart_service", desc, callback, params, + gaia::ToolPolicy::CONFIRM); +``` + +Key deliverables: +- **Cancellation & timeout** — `CancellationToken` for graceful abort, configurable HTTP timeouts +- **Event callback system** — typed `AgentEvent` structs for embedding in desktop applications (WPF, Qt, Electron) +- **Tool security** — per-tool allow/confirm/deny policies, argument validation callbacks, path traversal utilities +- **API versioning** — semantic versioning, stability guarantees, deprecation lifecycle +- **Performance benchmarks** — binary size tracking, loop latency, memory footprint, CI regression detection +- **Pluggable logging** — `Logger` interface for ETW, syslog, or custom telemetry +- **Streaming responses** — SSE parsing, token-by-token callbacks for responsive UIs +- **Runtime configuration** — JSON config files, environment variables, dynamic model switching + +→ [View C++ framework docs](/guides/cpp) • [View milestone on GitHub](https://github.com/amd/gaia/milestone/10) + ### Q2 2026: MCP Docs Server **AI assistant context for GAIA development** @@ -354,4 +389,4 @@ The more votes an issue gets, the higher priority it becomes. Your input directl --- -*Updated: January 26, 2026* +*Updated: February 27, 2026*