diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index ee05206..99fd276 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -1,4 +1,4 @@ -# Code generated by ADL CLI v0.23.2. DO NOT EDIT. +# Code generated by ADL CLI v0.23.6. DO NOT EDIT. # This file was automatically generated from an ADL (Agent Definition Language) specification. # Manual changes to this file may be overwritten during regeneration. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a10b2c8..79ff8ac 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,4 +1,4 @@ -# Code generated by ADL CLI v0.23.2. DO NOT EDIT. +# Code generated by ADL CLI v0.23.6. DO NOT EDIT. # This file was automatically generated from an ADL (Agent Definition Language) specification. # Manual changes to this file may be overwritten during regeneration. diff --git a/.releaserc.yaml b/.releaserc.yaml index 69f80d5..2ba3cd2 100644 --- a/.releaserc.yaml +++ b/.releaserc.yaml @@ -1,4 +1,4 @@ -# Code generated by ADL CLI v0.23.2. DO NOT EDIT. +# Code generated by ADL CLI v0.23.6. DO NOT EDIT. # This file was automatically generated from an ADL (Agent Definition Language) specification. # Manual changes to this file may be overwritten during regeneration. diff --git a/.well-known/agent-card.json b/.well-known/agent-card.json index 029e4d0..211fad1 100644 --- a/.well-known/agent-card.json +++ b/.well-known/agent-card.json @@ -68,13 +68,6 @@ "description": "Wait for specific conditions before proceeding with automation", "tags": ["wait","synchronization","timing","playwright"], "schema": {"properties":{"condition":{"description":"Type of condition (selector, navigation, function, timeout, networkidle)","type":"string"},"custom_function":{"description":"Custom JavaScript function to evaluate for 'function' condition","type":"string"},"selector":{"description":"Selector to wait for if condition is 'selector'","type":"string"},"state":{"default":"visible","description":"State to wait for (visible, hidden, attached, detached)","type":"string"},"timeout":{"default":30000,"description":"Maximum time to wait in milliseconds","type":"integer"}},"required":["condition"],"type":"object"} - }, - { - "id": "write_to_csv", - "name": "write_to_csv", - "description": "Write structured data to CSV files with support for custom headers and file paths", - "tags": ["export","csv","data","file"], - "schema": {"properties":{"append":{"default":false,"description":"Whether to append to existing file or create new file","type":"boolean"},"data":{"description":"Array of objects to write to CSV, each object represents a row","items":{"type":"object"},"type":"array"},"filename":{"description":"Name of the CSV file (without path, will be saved to configured data directory)","type":"string"},"headers":{"description":"Custom column headers for the CSV file (optional, will use object keys if not provided)","items":{"type":"string"},"type":"array"},"include_headers":{"default":true,"description":"Whether to include headers in the CSV output","type":"boolean"}},"required":["data","filename"],"type":"object"} } ] } diff --git a/AGENTS.md b/AGENTS.md index cddf862..31b9c8f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -30,7 +30,7 @@ This agent is built using the Agent Definition Language (ADL) and provides A2A c -**System Prompt**: You are an expert Playwright browser automation assistant. Your primary role is to help users automate web browser tasks efficiently and reliably. +**System Prompt**: You are an expert Playwright browser automation assistant with the ability to create downloadable artifacts. Your primary role is to help users automate web browser tasks efficiently and reliably. Your core capabilities include: 1. **Web Navigation**: Navigate to URLs, handle redirects, and manage page loads @@ -41,6 +41,7 @@ Your core capabilities include: 6. **JavaScript Execution**: Run custom scripts in the browser context 7. **Authentication Handling**: Manage various authentication methods 8. **Synchronization**: Wait for specific conditions and handle dynamic content +9. **Artifact Creation**: Create downloadable files for screenshots, extracted data, and CSV exports Key expertise areas: - Modern web technologies (SPA, dynamic content, AJAX) @@ -63,6 +64,11 @@ When helping users: - Provide clear explanations of automation steps - Optimize for speed while maintaining reliability +**IMPORTANT - Artifact Creation**: +When users request screenshots, the take_screenshot tool automatically creates downloadable artifacts. The screenshot will be available via a download URL returned in the response. + +For data extraction, you can use the create_artifact tool to save extracted data as downloadable files (JSON/CSV/TXT). + Your automation solutions should be maintainable, efficient, and production-ready. @@ -75,7 +81,7 @@ Your automation solutions should be maintainable, efficient, and production-read ## Skills -This agent provides 9 skills: +This agent provides 8 skills: ### navigate_to_url @@ -134,13 +140,6 @@ This agent provides 9 skills: - **Output Schema**: Defined in agent configuration -### write_to_csv -- **Description**: Write structured data to CSV files with support for custom headers and file paths -- **Tags**: export, csv, data, file -- **Input Schema**: Defined in agent configuration -- **Output Schema**: Defined in agent configuration - - ## Server Configuration @@ -246,11 +245,6 @@ curl -X POST http://localhost:8080/skills/wait_for_condition \ -H "Content-Type: application/json" \ -d '{"input": "your_input_here"}' -# Execute write_to_csv skill -curl -X POST http://localhost:8080/skills/write_to_csv \ - -H "Content-Type: application/json" \ - -d '{"input": "your_input_here"}' - ``` @@ -298,8 +292,6 @@ docker run -p 8080:8080 browser-agent │ └── wait_for_condition.go # Wait for specific conditions before proceeding with automation -│ └── write_to_csv.go # Write structured data to CSV files with support for custom headers and file paths - ├── .well-known/ # Agent configuration │ └── agent-card.json # Agent metadata ├── go.mod # Go module definition diff --git a/CLAUDE.md b/CLAUDE.md index 5726ee4..1d7f10f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -10,7 +10,7 @@ browser-agent is an A2A (Agent-to-Agent) server implementing the [A2A Protocol]( ### ADL-Generated Structure -The codebase is generated using ADL CLI 0.23.2 and follows a strict generation pattern: +The codebase is generated using ADL CLI 0.23.6 and follows a strict generation pattern: - **Generated Files**: Marked with `DO NOT EDIT` headers - manual changes will be overwritten - **Configuration Source**: `agent.yaml` - defines agent capabilities, skills, and metadata - **Server Implementation**: Built on the ADK (Agent Development Kit) framework from `github.com/inference-gateway/adk` @@ -82,7 +82,6 @@ The following skills are currently defined: - **execute_script**: Execute custom JavaScript code in the browser context - **handle_authentication**: Handle various authentication scenarios including basic auth, OAuth, and custom login forms - **wait_for_condition**: Wait for specific conditions before proceeding with automation -- **write_to_csv**: Write structured data to CSV files with support for custom headers and file paths To modify skills: 1. Update `agent.yaml` with skill definitions @@ -118,7 +117,7 @@ Activate with: `flox activate` (if Flox is installed) - **Generated Files**: Never manually edit files with "DO NOT EDIT" headers - **Configuration Changes**: Always modify `agent.yaml` and regenerate -- **ADL Version**: Ensure ADL CLI 0.23.2 or compatible version for regeneration +- **ADL Version**: Ensure ADL CLI 0.23.6 or compatible version for regeneration - **Port Configuration**: Default 8080, configurable via `A2A_PORT` or `A2A_SERVER_PORT` ## Debugging Tips diff --git a/README.md b/README.md index e6dc67c..a1047a8 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,6 @@ docker run -p 8080:8080 browser-agent | `execute_script` | Execute custom JavaScript code in the browser context |args, return_value, script | | `handle_authentication` | Handle various authentication scenarios including basic auth, OAuth, and custom login forms |login_url, password, password_selector, submit_selector, type, username, username_selector | | `wait_for_condition` | Wait for specific conditions before proceeding with automation |condition, custom_function, selector, state, timeout | -| `write_to_csv` | Write structured data to CSV files with support for custom headers and file paths |append, data, filename, headers, include_headers | ## Configuration diff --git a/Taskfile.yml b/Taskfile.yml index 7406c65..582a0fa 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -1,4 +1,4 @@ -# Code generated by ADL CLI v0.23.2. DO NOT EDIT. +# Code generated by ADL CLI v0.23.6. DO NOT EDIT. # This file was automatically generated from an ADL (Agent Definition Language) specification. # Manual changes to this file may be overwritten during regeneration. diff --git a/agent.yaml b/agent.yaml index afa3e56..91f02f3 100644 --- a/agent.yaml +++ b/agent.yaml @@ -310,50 +310,12 @@ spec: inject: - logger - playwright - - id: write_to_csv - name: write_to_csv - description: Write structured data to CSV files with support for custom headers and file paths - tags: - - export - - csv - - data - - file - schema: - type: object - properties: - data: - type: array - items: - type: object - description: Array of objects to write to CSV, each object represents a row - filename: - type: string - description: Name of the CSV file (without path, will be saved to configured data directory) - headers: - type: array - items: - type: string - description: Custom column headers for the CSV file (optional, will use object keys if not provided) - append: - type: boolean - description: Whether to append to existing file or create new file - default: false - include_headers: - type: boolean - description: Whether to include headers in the CSV output - default: true - required: - - data - - filename - inject: - - logger - - playwright agent: provider: "" model: "" systemPrompt: | - You are an expert Playwright browser automation assistant. Your primary role is to help users automate web browser tasks efficiently and reliably. - + You are an expert Playwright browser automation assistant with the ability to create downloadable artifacts. Your primary role is to help users automate web browser tasks efficiently and reliably. + Your core capabilities include: 1. **Web Navigation**: Navigate to URLs, handle redirects, and manage page loads 2. **Element Interaction**: Click buttons, fill forms, select dropdowns, and interact with any web element @@ -363,7 +325,8 @@ spec: 6. **JavaScript Execution**: Run custom scripts in the browser context 7. **Authentication Handling**: Manage various authentication methods 8. **Synchronization**: Wait for specific conditions and handle dynamic content - + 9. **Artifact Creation**: Create downloadable files for screenshots, extracted data, and CSV exports + Key expertise areas: - Modern web technologies (SPA, dynamic content, AJAX) - Selector strategies (CSS, XPath, text, accessibility) @@ -375,7 +338,7 @@ spec: - File uploads and downloads - Network interception and modification - Mobile and responsive testing - + When helping users: - Always use robust selectors that won't break easily - Implement proper wait strategies for dynamic content @@ -384,7 +347,12 @@ spec: - Consider accessibility and best practices - Provide clear explanations of automation steps - Optimize for speed while maintaining reliability - + + **IMPORTANT - Artifact Creation**: + When users request screenshots, the take_screenshot tool automatically creates downloadable artifacts. The screenshot will be available via a download URL returned in the response. + + For data extraction, you can use the create_artifact tool to save extracted data as downloadable files (JSON/CSV/TXT). + Your automation solutions should be maintainable, efficient, and production-ready. services: playwright: diff --git a/config/config.go b/config/config.go index 76c3da0..ef6cb77 100644 --- a/config/config.go +++ b/config/config.go @@ -1,4 +1,4 @@ -// Code generated by ADL CLI v0.23.2. DO NOT EDIT. +// Code generated by ADL CLI v0.23.6. DO NOT EDIT. // This file was automatically generated from an ADL (Agent Definition Language) specification. // Manual changes to this file may be overwritten during regeneration. diff --git a/example/docker-compose.yaml b/example/docker-compose.yaml index 5c4e1bc..6fe0526 100644 --- a/example/docker-compose.yaml +++ b/example/docker-compose.yaml @@ -53,6 +53,7 @@ services: A2A_AGENT_CLIENT_MAX_CHAT_COMPLETION_ITERATIONS: 20 A2A_AGENT_CLIENT_MAX_TOKENS: 4096 A2A_AGENT_CLIENT_TEMPERATURE: 0.7 + A2A_AGENT_CLIENT_TOOLS_CREATE_ARTIFACT: true A2A_CAPABILITIES_STREAMING: true A2A_CAPABILITIES_PUSH_NOTIFICATIONS: false A2A_CAPABILITIES_STATE_TRANSITION_HISTORY: false diff --git a/go.mod b/go.mod index b4cf915..aae80cd 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/inference-gateway/browser-agent go 1.25 require ( - github.com/inference-gateway/adk v0.14.0 + github.com/inference-gateway/adk v0.15.0 github.com/playwright-community/playwright-go v0.5200.1 github.com/sethvargo/go-envconfig v1.3.0 github.com/stretchr/testify v1.10.0 diff --git a/go.sum b/go.sum index c0d31af..4c64438 100644 --- a/go.sum +++ b/go.sum @@ -65,8 +65,8 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/inference-gateway/adk v0.14.0 h1:CauhKjXGGNJU+ICqHk4K0ysftcHNuFJvQ+Z4lcUS7mI= -github.com/inference-gateway/adk v0.14.0/go.mod h1:Eh91HM5d3R0I5OOAh3YNUqZCJBBdGPHrKBALnVL8dl0= +github.com/inference-gateway/adk v0.15.0 h1:KFBrZEOHlcTQ2g2ZJCcGyq7y6eM2/iOwDWKH99BEJKM= +github.com/inference-gateway/adk v0.15.0/go.mod h1:Eh91HM5d3R0I5OOAh3YNUqZCJBBdGPHrKBALnVL8dl0= github.com/inference-gateway/sdk v1.10.0 h1:88m1XTS5J7Q9+sFaKXKHAPXdDpji6SASXVWz2pe8ZFk= github.com/inference-gateway/sdk v1.10.0/go.mod h1:3TTD7Kbr7FRt+9ZbCPAm3u0tXUIWx7flZuwrRgZgrdk= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= diff --git a/internal/logger/logger.go b/internal/logger/logger.go index 669a094..5a913ff 100644 --- a/internal/logger/logger.go +++ b/internal/logger/logger.go @@ -1,4 +1,4 @@ -// Code generated by ADL CLI v0.23.2. DO NOT EDIT. +// Code generated by ADL CLI v0.23.6. DO NOT EDIT. // This file was automatically generated from an ADL (Agent Definition Language) specification. // Manual changes to this file may be overwritten during regeneration. diff --git a/main.go b/main.go index 295a24e..77badbf 100644 --- a/main.go +++ b/main.go @@ -1,4 +1,4 @@ -// Code generated by ADL CLI v0.23.2. DO NOT EDIT. +// Code generated by ADL CLI v0.23.6. DO NOT EDIT. // This file was automatically generated from an ADL (Agent Definition Language) specification. // Manual changes to this file may be overwritten during regeneration. @@ -44,14 +44,15 @@ func main() { l.Info("starting " + AgentName + " agent (version: " + Version + ", environment: " + cfg.Environment + ")") - toolBox := server.NewDefaultToolBox() - // Initialize services playwrightSvc, err := playwright.NewPlaywrightService(l, &cfg) if err != nil { l.Fatal("failed to initialize playwright service", zap.Error(err)) } + // Create toolbox with default tools (like input_required, create_artifact etc) + toolBox := server.NewDefaultToolBox(&cfg.A2A.AgentConfig.ToolBoxConfig) + // Register navigate_to_url skill navigateToURLSkill := skills.NewNavigateToURLSkill(l, playwrightSvc) toolBox.AddTool(navigateToURLSkill) @@ -92,11 +93,6 @@ func main() { toolBox.AddTool(waitForConditionSkill) l.Info("registered skill: wait_for_condition (Wait for specific conditions before proceeding with automation)") - // Register write_to_csv skill - writeToCsvSkill := skills.NewWriteToCsvSkill(l, playwrightSvc) - toolBox.AddTool(writeToCsvSkill) - l.Info("registered skill: write_to_csv (Write structured data to CSV files with support for custom headers and file paths)") - llmClient, err := server.NewOpenAICompatibleLLMClient(&cfg.A2A.AgentConfig, l) if err != nil { l.Fatal("failed to create LLM client", zap.Error(err)) @@ -107,7 +103,7 @@ func main() { WithLLMClient(llmClient). WithToolBox(toolBox). WithMaxChatCompletion(cfg.A2A.AgentConfig.MaxChatCompletionIterations). - WithSystemPrompt(`You are an expert Playwright browser automation assistant. Your primary role is to help users automate web browser tasks efficiently and reliably. + WithSystemPrompt(`You are an expert Playwright browser automation assistant with the ability to create downloadable artifacts. Your primary role is to help users automate web browser tasks efficiently and reliably. Your core capabilities include: 1. **Web Navigation**: Navigate to URLs, handle redirects, and manage page loads @@ -118,6 +114,7 @@ Your core capabilities include: 6. **JavaScript Execution**: Run custom scripts in the browser context 7. **Authentication Handling**: Manage various authentication methods 8. **Synchronization**: Wait for specific conditions and handle dynamic content +9. **Artifact Creation**: Create downloadable files for screenshots, extracted data, and CSV exports Key expertise areas: - Modern web technologies (SPA, dynamic content, AJAX) @@ -140,6 +137,11 @@ When helping users: - Provide clear explanations of automation steps - Optimize for speed while maintaining reliability +**IMPORTANT - Artifact Creation**: +When users request screenshots, the take_screenshot tool automatically creates downloadable artifacts. The screenshot will be available via a download URL returned in the response. + +For data extraction, you can use the create_artifact tool to save extracted data as downloadable files (JSON/CSV/TXT). + Your automation solutions should be maintainable, efficient, and production-ready. `). Build() @@ -147,24 +149,36 @@ Your automation solutions should be maintainable, efficient, and production-read l.Fatal("failed to create agent", zap.Error(err)) } + artifactService, err := server.NewArtifactService(&cfg.A2A.ArtifactsConfig, l) + if err != nil { + l.Warn("artifact service could not be created - check ARTIFACTS_ENABLE environment variable", zap.Error(err)) + l.Info("continuing without artifact service support") + artifactService = nil + } + artifactsServer, err := server. NewArtifactsServerBuilder(&cfg.A2A.ArtifactsConfig, l). Build() if err != nil { - l.Warn("artifacts server could not be created - check ARTIFACTS_ENABLE environment variable", zap.Error(err)) - l.Info("continuing without artifacts server support") + l.Warn("artifacts server could not be created", zap.Error(err)) + l.Info("continuing without artifacts server") artifactsServer = nil } - a2aServer, err := server.NewA2AServerBuilder(cfg.A2A, l). + serverBuilder := server.NewA2AServerBuilder(cfg.A2A, l). WithAgent(agent). WithAgentCardFromFile(".well-known/agent-card.json", map[string]any{ "name": AgentName, "version": Version, "description": AgentDescription, "url": cfg.A2A.AgentURL, - }). - WithArtifactStorage(artifactsServer.GetStorage()). + }) + + if artifactService != nil { + serverBuilder = serverBuilder.WithArtifactService(artifactService) + } + + a2aServer, err := serverBuilder. WithDefaultBackgroundTaskHandler(). WithDefaultStreamingTaskHandler(). Build() diff --git a/skills/take_screenshot.go b/skills/take_screenshot.go index e260114..4ea9e6e 100644 --- a/skills/take_screenshot.go +++ b/skills/take_screenshot.go @@ -63,17 +63,6 @@ func NewTakeScreenshotSkill(logger *zap.Logger, playwright playwright.BrowserAut // TakeScreenshotHandler handles the take_screenshot skill execution func (s *TakeScreenshotSkill) TakeScreenshotHandler(ctx context.Context, args map[string]any) (string, error) { - artifactHelper, ok := ctx.Value(server.ArtifactHelperContextKey).(*server.ArtifactHelper) - if !ok { - s.logger.Warn("unable to get artifact helper from context") - return "", fmt.Errorf("artifact helper not available in context") - } - - task, ok := ctx.Value(server.TaskContextKey).(*types.Task) - if !ok { - s.logger.Warn("unable to get task from context") - return "", fmt.Errorf("task not available in context") - } generatedPath, err := s.generateDeterministicPath(args) if err != nil { @@ -132,62 +121,49 @@ func (s *TakeScreenshotSkill) TakeScreenshotHandler(ctx context.Context, args ma return "", fmt.Errorf("screenshot failed: %w", err) } - screenshotData, err := os.ReadFile(generatedPath) - if err != nil { - s.logger.Error("failed to read screenshot file", zap.String("path", generatedPath), zap.Error(err)) - return "", fmt.Errorf("failed to read screenshot file: %w", err) - } + s.logger.Info("screenshot completed successfully", + zap.String("sessionID", session.ID), + zap.String("path", generatedPath)) - metadata, err := s.getScreenshotMetadata(generatedPath, fullPage, selector, imageType, quality) + artifactURL, artifactID, err := s.createArtifactFromScreenshot(ctx, generatedPath, imageType) if err != nil { - s.logger.Warn("failed to get screenshot metadata", zap.Error(err)) - } - - mimeType := s.getMimeType(imageType) - filename := filepath.Base(generatedPath) - - screenshotArtifact := artifactHelper.CreateFileArtifactFromBytes( - fmt.Sprintf("Screenshot: %s", filename), - fmt.Sprintf("Screenshot captured from browser session %s", session.ID), - filename, - screenshotData, - &mimeType, - ) - - if metadata != nil { - screenshotArtifact.Metadata = metadata - } - - artifactHelper.AddArtifactToTask(task, screenshotArtifact) - s.logger.Info("artifact added to task", - zap.String("taskID", task.ID), - zap.String("artifactID", screenshotArtifact.ArtifactID)) + s.logger.Debug("artifact creation skipped or failed, returning file path only", + zap.Error(err), + zap.String("path", generatedPath)) + + response := map[string]any{ + "success": true, + "path": generatedPath, + "filename": filepath.Base(generatedPath), + "full_page": fullPage, + "type": imageType, + "quality": quality, + "selector": selector, + "session_id": session.ID, + "timestamp": s.getCurrentTimestamp(), + "message": fmt.Sprintf("Screenshot captured successfully and saved to %s", generatedPath), + } - if err := os.Remove(generatedPath); err != nil { - s.logger.Warn("failed to clean up temporary screenshot file", - zap.String("path", generatedPath), - zap.Error(err)) - } else { - s.logger.Debug("cleaned up temporary screenshot file", zap.String("path", generatedPath)) + responseJSON, err := json.Marshal(response) + if err != nil { + return "", fmt.Errorf("failed to marshal response: %w", err) + } + return string(responseJSON), nil } - s.logger.Info("screenshot completed successfully", - zap.String("sessionID", session.ID), - zap.String("artifactID", screenshotArtifact.ArtifactID), - zap.Int("fileSize", len(screenshotData))) - response := map[string]any{ "success": true, - "filename": filename, + "path": generatedPath, + "filename": filepath.Base(generatedPath), "full_page": fullPage, "type": imageType, "quality": quality, "selector": selector, "session_id": session.ID, - "artifact_id": screenshotArtifact.ArtifactID, - "file_size": len(screenshotData), "timestamp": s.getCurrentTimestamp(), - "message": "Screenshot captured successfully and stored as artifact", + "artifact_id": artifactID, + "url": artifactURL, + "message": fmt.Sprintf("Screenshot captured successfully. Download URL: %s", artifactURL), } responseJSON, err := json.Marshal(response) @@ -256,36 +232,51 @@ func (s *TakeScreenshotSkill) getOrCreateSession(ctx context.Context) (*playwrig return s.playwright.GetOrCreateDefaultSession(ctx) } -// getScreenshotMetadata extracts metadata about the screenshot file -func (s *TakeScreenshotSkill) getScreenshotMetadata(path string, fullPage bool, selector, imageType string, quality int) (map[string]any, error) { - fileInfo, err := os.Stat(path) - if err != nil { - return nil, fmt.Errorf("failed to get file info: %w", err) +// getCurrentTimestamp returns the current timestamp in RFC3339 format +func (s *TakeScreenshotSkill) getCurrentTimestamp() string { + return time.Now().Format(time.RFC3339) +} + +// createArtifactFromScreenshot creates an artifact from the screenshot file +func (s *TakeScreenshotSkill) createArtifactFromScreenshot(ctx context.Context, filePath, imageType string) (url string, artifactID string, err error) { + task, ok := ctx.Value(server.TaskContextKey).(*types.Task) + if !ok { + return "", "", fmt.Errorf("task not found in context") } - metadata := map[string]any{ - "file_size": fileInfo.Size(), - "created_at": fileInfo.ModTime().Format(time.RFC3339), - "permissions": fileInfo.Mode().String(), - "full_page": fullPage, - "image_type": imageType, - "quality": quality, - "capture_type": "viewport", + artifactService, ok := ctx.Value(server.ArtifactServiceContextKey).(server.ArtifactService) + if !ok || artifactService == nil { + return "", "", fmt.Errorf("artifact service not available") } - if fullPage { - metadata["capture_type"] = "full_page" + data, err := os.ReadFile(filePath) + if err != nil { + return "", "", fmt.Errorf("failed to read screenshot file: %w", err) } - if selector != "" { - metadata["capture_type"] = "element" - metadata["selector"] = selector + mimeType := s.getMimeType(imageType) + + filename := filepath.Base(filePath) + artifact, err := artifactService.CreateFileArtifact( + fmt.Sprintf("Screenshot - %s", filename), + fmt.Sprintf("Screenshot captured at %s", s.getCurrentTimestamp()), + filename, + data, + &mimeType, + ) + if err != nil { + return "", "", fmt.Errorf("failed to create artifact: %w", err) } - return metadata, nil -} + artifactService.AddArtifactToTask(task, artifact) -// getCurrentTimestamp returns the current timestamp in RFC3339 format -func (s *TakeScreenshotSkill) getCurrentTimestamp() string { - return time.Now().Format(time.RFC3339) + if len(artifact.Parts) > 0 { + if filePart, ok := artifact.Parts[0].(types.FilePart); ok { + if fileWithURI, ok := filePart.File.(types.FileWithUri); ok { + return fileWithURI.URI, artifact.ArtifactID, nil + } + } + } + + return "", artifact.ArtifactID, nil } diff --git a/skills/take_screenshot_test.go b/skills/take_screenshot_test.go index a90d23b..940039d 100644 --- a/skills/take_screenshot_test.go +++ b/skills/take_screenshot_test.go @@ -9,8 +9,6 @@ import ( "testing" "time" - server "github.com/inference-gateway/adk/server" - types "github.com/inference-gateway/adk/types" config "github.com/inference-gateway/browser-agent/config" playwright "github.com/inference-gateway/browser-agent/internal/playwright" mocks "github.com/inference-gateway/browser-agent/internal/playwright/mocks" @@ -54,10 +52,6 @@ func TestTakeScreenshotHandler_BasicFunctionality(t *testing.T) { args := map[string]any{} ctx := context.Background() - ctx = context.WithValue(ctx, server.ArtifactHelperContextKey, server.NewArtifactHelper()) - ctx = context.WithValue(ctx, server.TaskContextKey, &types.Task{ - ID: "test-task-123", - }) result, err := skill.TakeScreenshotHandler(ctx, args) @@ -74,6 +68,15 @@ func TestTakeScreenshotHandler_BasicFunctionality(t *testing.T) { t.Errorf("Expected success to be true, got: %v", response["success"]) } + resultPath, ok := response["path"].(string) + if !ok { + t.Errorf("Expected path in response, got: %v", response["path"]) + } + + if !strings.Contains(resultPath, "viewport_") { + t.Errorf("Expected viewport screenshot path, got: %s", resultPath) + } + resultFilename, ok := response["filename"].(string) if !ok { t.Errorf("Expected filename in response, got: %v", response["filename"]) @@ -98,8 +101,6 @@ func TestTakeScreenshotHandler_FullPageScreenshot(t *testing.T) { } ctx := context.Background() - ctx = context.WithValue(ctx, server.ArtifactHelperContextKey, server.NewArtifactHelper()) - ctx = context.WithValue(ctx, server.TaskContextKey, &types.Task{ID: "test-task-123"}) result, err := skill.TakeScreenshotHandler(ctx, args) @@ -133,8 +134,6 @@ func TestTakeScreenshotHandler_JPEGWithQuality(t *testing.T) { } ctx := context.Background() - ctx = context.WithValue(ctx, server.ArtifactHelperContextKey, server.NewArtifactHelper()) - ctx = context.WithValue(ctx, server.TaskContextKey, &types.Task{ID: "test-task-123"}) result, err := skill.TakeScreenshotHandler(ctx, args) @@ -171,8 +170,6 @@ func TestTakeScreenshotHandler_ElementSelector(t *testing.T) { } ctx := context.Background() - ctx = context.WithValue(ctx, server.ArtifactHelperContextKey, server.NewArtifactHelper()) - ctx = context.WithValue(ctx, server.TaskContextKey, &types.Task{ID: "test-task-123"}) result, err := skill.TakeScreenshotHandler(ctx, args) @@ -203,8 +200,6 @@ func TestTakeScreenshotHandler_DeterministicPath(t *testing.T) { args := map[string]any{} ctx := context.Background() - ctx = context.WithValue(ctx, server.ArtifactHelperContextKey, server.NewArtifactHelper()) - ctx = context.WithValue(ctx, server.TaskContextKey, &types.Task{ID: "test-task-123"}) result, err := skill.TakeScreenshotHandler(ctx, args) @@ -232,8 +227,6 @@ func TestTakeScreenshotHandler_InvalidImageType(t *testing.T) { } ctx := context.Background() - ctx = context.WithValue(ctx, server.ArtifactHelperContextKey, server.NewArtifactHelper()) - ctx = context.WithValue(ctx, server.TaskContextKey, &types.Task{ID: "test-task-123"}) _, err := skill.TakeScreenshotHandler(ctx, args) @@ -258,8 +251,6 @@ func TestTakeScreenshotHandler_InvalidQuality(t *testing.T) { } ctx := context.Background() - ctx = context.WithValue(ctx, server.ArtifactHelperContextKey, server.NewArtifactHelper()) - ctx = context.WithValue(ctx, server.TaskContextKey, &types.Task{ID: "test-task-123"}) _, err := skill.TakeScreenshotHandler(ctx, args) diff --git a/skills/write_to_csv.go b/skills/write_to_csv.go deleted file mode 100644 index bc6df67..0000000 --- a/skills/write_to_csv.go +++ /dev/null @@ -1,288 +0,0 @@ -package skills - -import ( - "bytes" - "context" - "encoding/csv" - "fmt" - "os" - "path/filepath" - "strconv" - - server "github.com/inference-gateway/adk/server" - types "github.com/inference-gateway/adk/types" - playwright "github.com/inference-gateway/browser-agent/internal/playwright" - zap "go.uber.org/zap" -) - -// WriteToCsvSkill struct holds the skill with services -type WriteToCsvSkill struct { - logger *zap.Logger - playwright playwright.BrowserAutomation -} - -// NewWriteToCsvSkill creates a new write_to_csv skill -func NewWriteToCsvSkill(logger *zap.Logger, playwright playwright.BrowserAutomation) server.Tool { - skill := &WriteToCsvSkill{ - logger: logger, - playwright: playwright, - } - return server.NewBasicTool( - "write_to_csv", - "Write structured data to CSV files with support for custom headers and file paths", - map[string]any{ - "type": "object", - "properties": map[string]any{ - "append": map[string]any{ - "default": false, - "description": "Whether to append to existing file or create new file", - "type": "boolean", - }, - "data": map[string]any{ - "description": "Array of objects to write to CSV, each object represents a row", - "items": map[string]any{"type": "object"}, - "type": "array", - }, - "filename": map[string]any{ - "description": "Name of the CSV file (without path, will be saved to configured data directory)", - "type": "string", - }, - "headers": map[string]any{ - "description": "Custom column headers for the CSV file (optional, will use object keys if not provided)", - "items": map[string]any{"type": "string"}, - "type": "array", - }, - "include_headers": map[string]any{ - "default": true, - "description": "Whether to include headers in the CSV output", - "type": "boolean", - }, - }, - "required": []string{"data", "filename"}, - }, - skill.WriteToCsvHandler, - ) -} - -// WriteToCsvHandler handles the write_to_csv skill execution -func (s *WriteToCsvSkill) WriteToCsvHandler(ctx context.Context, args map[string]any) (string, error) { - artifactHelper, ok := ctx.Value(server.ArtifactHelperContextKey).(*server.ArtifactHelper) - if !ok { - s.logger.Warn("unable to get artifact helper from context") - return "", fmt.Errorf("artifact helper not available in context") - } - - task, ok := ctx.Value(server.TaskContextKey).(*types.Task) - if !ok { - s.logger.Warn("unable to get task from context") - return "", fmt.Errorf("task not available in context") - } - - data, ok := args["data"].([]any) - if !ok || len(data) == 0 { - s.logger.Error("data parameter is required and must be a non-empty array") - return "", fmt.Errorf("data parameter is required and must be a non-empty array") - } - - filename, ok := args["filename"].(string) - if !ok || filename == "" { - s.logger.Error("filename parameter is required and must be a non-empty string") - return "", fmt.Errorf("filename parameter is required and must be a non-empty string") - } - - filePath := s.generateFilePath(filename) - - var customHeaders []string - if headers, ok := args["headers"].([]any); ok { - customHeaders = make([]string, len(headers)) - for i, header := range headers { - if headerStr, ok := header.(string); ok { - customHeaders[i] = headerStr - } else { - return "", fmt.Errorf("all headers must be strings") - } - } - } - - append := false - if appendVal, ok := args["append"].(bool); ok { - append = appendVal - } - - includeHeaders := true - if includeVal, ok := args["include_headers"].(bool); ok { - includeHeaders = includeVal - } - - s.logger.Info("writing data to CSV file", - zap.String("filename", filename), - zap.String("file_path", filePath), - zap.Int("rows_count", len(data)), - zap.Bool("append", append), - zap.Bool("include_headers", includeHeaders)) - - rows, err := s.convertDataToRows(data) - if err != nil { - s.logger.Error("failed to convert data to rows", zap.Error(err)) - return "", fmt.Errorf("failed to convert data to rows: %w", err) - } - - headers := customHeaders - if len(headers) == 0 && len(rows) > 0 { - headers = s.extractHeadersFromRows(rows) - } - - var csvBuffer bytes.Buffer - writer := csv.NewWriter(&csvBuffer) - - if includeHeaders && len(headers) > 0 { - if err := writer.Write(headers); err != nil { - return "", fmt.Errorf("failed to write headers: %w", err) - } - } - - rowsWritten := 0 - for _, row := range rows { - csvRow := make([]string, len(headers)) - for i, header := range headers { - if value, exists := row[header]; exists { - csvRow[i] = s.valueToString(value) - } else { - csvRow[i] = "" - } - } - - if err := writer.Write(csvRow); err != nil { - return "", fmt.Errorf("failed to write row: %w", err) - } - rowsWritten++ - } - - writer.Flush() - if err := writer.Error(); err != nil { - return "", fmt.Errorf("CSV writer error: %w", err) - } - - csvData := csvBuffer.Bytes() - - mimeType := "text/csv" - baseFilename := filepath.Base(filePath) - csvArtifact := artifactHelper.CreateFileArtifactFromBytes( - fmt.Sprintf("CSV File: %s", baseFilename), - fmt.Sprintf("CSV file with %d rows written to %s", rowsWritten, filePath), - baseFilename, - csvData, - &mimeType, - ) - - csvArtifact.Metadata = map[string]any{ - "rows_written": rowsWritten, - "headers": headers, - "include_headers": includeHeaders, - "append_mode": append, - "file_size": len(csvData), - "original_records": len(data), - } - - artifactHelper.AddArtifactToTask(task, csvArtifact) - s.logger.Info("CSV artifact added to task", - zap.String("taskID", task.ID), - zap.String("artifactID", csvArtifact.ArtifactID)) - - s.logger.Info("CSV data created successfully as artifact", - zap.String("filename", baseFilename), - zap.Int("rows_written", rowsWritten), - zap.String("artifactID", csvArtifact.ArtifactID)) - - result := fmt.Sprintf("Successfully created CSV with %d rows as artifact %s (%s)", rowsWritten, csvArtifact.ArtifactID, baseFilename) - return result, nil -} - -func (s *WriteToCsvSkill) generateFilePath(filename string) string { - var dataDir string - - if s.playwright != nil && s.playwright.GetConfig() != nil { - dataDir = s.playwright.GetConfig().Browser.DataDir - } - - if dataDir == "" { - dataDir = "." - } - - if err := os.MkdirAll(dataDir, 0755); err != nil { - s.logger.Warn("failed to create data files directory", zap.String("dir", dataDir), zap.Error(err)) - } - - if !filepath.IsAbs(filename) { - return filepath.Join(dataDir, filename) - } - return filename -} - -func (s *WriteToCsvSkill) convertDataToRows(data []any) ([]map[string]any, error) { - rows := make([]map[string]any, len(data)) - - for i, item := range data { - switch v := item.(type) { - case map[string]any: - rows[i] = v - case map[any]any: - converted := make(map[string]any) - for key, value := range v { - if keyStr, ok := key.(string); ok { - converted[keyStr] = value - } else { - converted[fmt.Sprintf("%v", key)] = value - } - } - rows[i] = converted - default: - return nil, fmt.Errorf("data item at index %d must be an object/map, got %T", i, item) - } - } - - return rows, nil -} - -func (s *WriteToCsvSkill) extractHeadersFromRows(rows []map[string]any) []string { - headerSet := make(map[string]bool) - var headers []string - - for _, row := range rows { - for key := range row { - if !headerSet[key] { - headerSet[key] = true - headers = append(headers, key) - } - } - } - - return headers -} - -func (s *WriteToCsvSkill) valueToString(value any) string { - if value == nil { - return "" - } - - switch v := value.(type) { - case string: - return v - case int: - return strconv.Itoa(v) - case int64: - return strconv.FormatInt(v, 10) - case float64: - return strconv.FormatFloat(v, 'f', -1, 64) - case bool: - return strconv.FormatBool(v) - case []any: - var items []string - for _, item := range v { - items = append(items, s.valueToString(item)) - } - return fmt.Sprintf("[%s]", fmt.Sprintf("%v", items)) - default: - return fmt.Sprintf("%v", v) - } -} diff --git a/skills/write_to_csv_test.go b/skills/write_to_csv_test.go deleted file mode 100644 index 3eb0384..0000000 --- a/skills/write_to_csv_test.go +++ /dev/null @@ -1,278 +0,0 @@ -package skills - -import ( - "context" - "strings" - "testing" - - server "github.com/inference-gateway/adk/server" - types "github.com/inference-gateway/adk/types" - config "github.com/inference-gateway/browser-agent/config" - mocks "github.com/inference-gateway/browser-agent/internal/playwright/mocks" - zap "go.uber.org/zap" -) - -func TestWriteToCsvHandler(t *testing.T) { - logger := zap.NewNop() - mockPlaywright := &mocks.FakeBrowserAutomation{} - mockPlaywright.GetConfigReturns(&config.Config{ - Browser: config.BrowserConfig{ - DataDir: "/tmp", - }, - }) - - skill := &WriteToCsvSkill{ - logger: logger, - playwright: mockPlaywright, - } - - tests := []struct { - name string - args map[string]any - expectedError bool - expectedRows int - validateOutput func(t *testing.T, result string) - }{ - { - name: "basic CSV writing", - args: map[string]any{ - "data": []any{ - map[string]any{"name": "Alice", "age": 30, "city": "New York"}, - map[string]any{"name": "Bob", "age": 25, "city": "San Francisco"}, - }, - "filename": "basic.csv", - }, - expectedError: false, - expectedRows: 2, - validateOutput: func(t *testing.T, result string) { - if !strings.Contains(result, "2 rows") { - t.Errorf("Expected result to mention 2 rows, got: %s", result) - } - if !strings.Contains(result, "basic.csv") { - t.Errorf("Expected result to mention basic.csv, got: %s", result) - } - if !strings.Contains(result, "artifact") { - t.Errorf("Expected result to mention artifact, got: %s", result) - } - }, - }, - { - name: "CSV with custom headers", - args: map[string]any{ - "data": []any{ - map[string]any{"name": "Alice", "age": 30}, - map[string]any{"name": "Bob", "age": 25}, - }, - "filename": "custom_headers.csv", - "headers": []any{"name", "age"}, - }, - expectedError: false, - expectedRows: 2, - validateOutput: func(t *testing.T, result string) { - if !strings.Contains(result, "2 rows") { - t.Errorf("Expected result to mention 2 rows, got: %s", result) - } - if !strings.Contains(result, "custom_headers.csv") { - t.Errorf("Expected result to mention custom_headers.csv, got: %s", result) - } - if !strings.Contains(result, "artifact") { - t.Errorf("Expected result to mention artifact, got: %s", result) - } - }, - }, - { - name: "CSV without headers", - args: map[string]any{ - "data": []any{ - map[string]any{"name": "Alice", "age": 30}, - map[string]any{"name": "Bob", "age": 25}, - }, - "filename": "no_headers.csv", - "include_headers": false, - }, - expectedError: false, - expectedRows: 2, - validateOutput: func(t *testing.T, result string) { - if !strings.Contains(result, "2 rows") { - t.Errorf("Expected result to mention 2 rows, got: %s", result) - } - if !strings.Contains(result, "no_headers.csv") { - t.Errorf("Expected result to mention no_headers.csv, got: %s", result) - } - if !strings.Contains(result, "artifact") { - t.Errorf("Expected result to mention artifact, got: %s", result) - } - }, - }, - { - name: "append to existing file", - args: map[string]any{ - "data": []any{ - map[string]any{"name": "Charlie", "age": 35}, - }, - "filename": "basic.csv", - "append": true, - }, - expectedError: false, - expectedRows: 1, - validateOutput: func(t *testing.T, result string) { - if !strings.Contains(result, "1 rows") { - t.Errorf("Expected result to mention 1 rows, got: %s", result) - } - if !strings.Contains(result, "basic.csv") { - t.Errorf("Expected result to mention basic.csv, got: %s", result) - } - if !strings.Contains(result, "artifact") { - t.Errorf("Expected result to mention artifact, got: %s", result) - } - }, - }, - { - name: "invalid data type", - args: map[string]any{ - "data": "not an array", - "filename": "invalid.csv", - }, - expectedError: true, - }, - { - name: "empty file path", - args: map[string]any{ - "data": []any{map[string]any{"name": "Alice"}}, - "filename": "", - }, - expectedError: true, - }, - { - name: "empty data array", - args: map[string]any{ - "data": []any{}, - "filename": "empty.csv", - }, - expectedError: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - ctx := context.Background() - ctx = context.WithValue(ctx, server.ArtifactHelperContextKey, server.NewArtifactHelper()) - ctx = context.WithValue(ctx, server.TaskContextKey, &types.Task{ID: "test-task-123"}) - - result, err := skill.WriteToCsvHandler(ctx, tt.args) - - if tt.expectedError { - if err == nil { - t.Error("Expected an error but got none") - } - return - } - - if err != nil { - t.Errorf("Unexpected error: %v", err) - return - } - - if !strings.Contains(result, "Successfully created CSV") { - t.Errorf("Expected success message, got: %s", result) - } - - if tt.validateOutput != nil { - tt.validateOutput(t, result) - } - }) - } -} - -func TestConvertDataToRows(t *testing.T) { - logger := zap.NewNop() - skill := &WriteToCsvSkill{logger: logger} - - tests := []struct { - name string - input []any - expectedError bool - expectedLen int - }{ - { - name: "valid map[string]any data", - input: []any{ - map[string]any{"name": "Alice", "age": 30}, - map[string]any{"name": "Bob", "age": 25}, - }, - expectedError: false, - expectedLen: 2, - }, - { - name: "mixed map types", - input: []any{ - map[string]any{"name": "Alice"}, - map[any]any{"name": "Bob", "age": 25}, - }, - expectedError: false, - expectedLen: 2, - }, - { - name: "invalid data type", - input: []any{ - "not a map", - map[string]any{"name": "Alice"}, - }, - expectedError: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result, err := skill.convertDataToRows(tt.input) - - if tt.expectedError { - if err == nil { - t.Error("Expected an error but got none") - } - return - } - - if err != nil { - t.Errorf("Unexpected error: %v", err) - return - } - - if len(result) != tt.expectedLen { - t.Errorf("Expected %d rows, got %d", tt.expectedLen, len(result)) - } - }) - } -} - -func TestValueToString(t *testing.T) { - logger := zap.NewNop() - skill := &WriteToCsvSkill{logger: logger} - - tests := []struct { - name string - input any - expected string - }{ - {"string", "hello", "hello"}, - {"int", 42, "42"}, - {"float", 3.14, "3.14"}, - {"bool true", true, "true"}, - {"bool false", false, "false"}, - {"nil", nil, ""}, - {"array", []any{"a", "b", "c"}, "[%!v([]string=[a b c])]"}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := skill.valueToString(tt.input) - if tt.name != "array" && result != tt.expected { - t.Errorf("Expected %q, got %q", tt.expected, result) - } - - if tt.name == "array" && result == "" { - t.Error("Expected non-empty string for array") - } - }) - } -}