From e40376e022bda056d29ab29747e860b96315b1a4 Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Tue, 14 Oct 2025 17:27:04 +0200 Subject: [PATCH 1/4] refactor: Remove write_to_csv skill and related tests; update agent capabilities for artifact creation Signed-off-by: Eden Reich --- README.md | 1 - agent.yaml | 56 ++----- go.mod | 2 +- go.sum | 4 +- main.go | 43 +++-- skills/take_screenshot.go | 75 ++------- skills/take_screenshot_test.go | 27 ++-- skills/write_to_csv.go | 288 --------------------------------- skills/write_to_csv_test.go | 278 ------------------------------- 9 files changed, 66 insertions(+), 708 deletions(-) delete mode 100644 skills/write_to_csv.go delete mode 100644 skills/write_to_csv_test.go diff --git a/README.md b/README.md index e6dc67c..a1047a8 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,6 @@ docker run -p 8080:8080 browser-agent | `execute_script` | Execute custom JavaScript code in the browser context |args, return_value, script | | `handle_authentication` | Handle various authentication scenarios including basic auth, OAuth, and custom login forms |login_url, password, password_selector, submit_selector, type, username, username_selector | | `wait_for_condition` | Wait for specific conditions before proceeding with automation |condition, custom_function, selector, state, timeout | -| `write_to_csv` | Write structured data to CSV files with support for custom headers and file paths |append, data, filename, headers, include_headers | ## Configuration diff --git a/agent.yaml b/agent.yaml index afa3e56..03e4a6b 100644 --- a/agent.yaml +++ b/agent.yaml @@ -310,50 +310,12 @@ spec: inject: - logger - playwright - - id: write_to_csv - name: write_to_csv - description: Write structured data to CSV files with support for custom headers and file paths - tags: - - export - - csv - - data - - file - schema: - type: object - properties: - data: - type: array - items: - type: object - description: Array of objects to write to CSV, each object represents a row - filename: - type: string - description: Name of the CSV file (without path, will be saved to configured data directory) - headers: - type: array - items: - type: string - description: Custom column headers for the CSV file (optional, will use object keys if not provided) - append: - type: boolean - description: Whether to append to existing file or create new file - default: false - include_headers: - type: boolean - description: Whether to include headers in the CSV output - default: true - required: - - data - - filename - inject: - - logger - - playwright agent: provider: "" model: "" systemPrompt: | - You are an expert Playwright browser automation assistant. Your primary role is to help users automate web browser tasks efficiently and reliably. - + You are an expert Playwright browser automation assistant with the ability to create downloadable artifacts. Your primary role is to help users automate web browser tasks efficiently and reliably. + Your core capabilities include: 1. **Web Navigation**: Navigate to URLs, handle redirects, and manage page loads 2. **Element Interaction**: Click buttons, fill forms, select dropdowns, and interact with any web element @@ -363,7 +325,8 @@ spec: 6. **JavaScript Execution**: Run custom scripts in the browser context 7. **Authentication Handling**: Manage various authentication methods 8. **Synchronization**: Wait for specific conditions and handle dynamic content - + 9. **Artifact Creation**: Create downloadable files for screenshots, extracted data, and CSV exports + Key expertise areas: - Modern web technologies (SPA, dynamic content, AJAX) - Selector strategies (CSS, XPath, text, accessibility) @@ -375,7 +338,7 @@ spec: - File uploads and downloads - Network interception and modification - Mobile and responsive testing - + When helping users: - Always use robust selectors that won't break easily - Implement proper wait strategies for dynamic content @@ -384,7 +347,14 @@ spec: - Consider accessibility and best practices - Provide clear explanations of automation steps - Optimize for speed while maintaining reliability - + + **IMPORTANT - Artifact Creation**: + When users request: + - Screenshots → Use take_screenshot tool, then use create_artifact to save the screenshot file as a downloadable artifact + - Data extraction → Use extract_data tool, then use create_artifact to save the extracted data as a downloadable file (JSON/CSV/TXT) + + After capturing screenshots or extracting data, ALWAYS use the create_artifact tool to make the files downloadable for the user. Read the file from the path returned by the tool and create an artifact with appropriate MIME type. + Your automation solutions should be maintainable, efficient, and production-ready. services: playwright: diff --git a/go.mod b/go.mod index b4cf915..aae80cd 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/inference-gateway/browser-agent go 1.25 require ( - github.com/inference-gateway/adk v0.14.0 + github.com/inference-gateway/adk v0.15.0 github.com/playwright-community/playwright-go v0.5200.1 github.com/sethvargo/go-envconfig v1.3.0 github.com/stretchr/testify v1.10.0 diff --git a/go.sum b/go.sum index c0d31af..4c64438 100644 --- a/go.sum +++ b/go.sum @@ -65,8 +65,8 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/inference-gateway/adk v0.14.0 h1:CauhKjXGGNJU+ICqHk4K0ysftcHNuFJvQ+Z4lcUS7mI= -github.com/inference-gateway/adk v0.14.0/go.mod h1:Eh91HM5d3R0I5OOAh3YNUqZCJBBdGPHrKBALnVL8dl0= +github.com/inference-gateway/adk v0.15.0 h1:KFBrZEOHlcTQ2g2ZJCcGyq7y6eM2/iOwDWKH99BEJKM= +github.com/inference-gateway/adk v0.15.0/go.mod h1:Eh91HM5d3R0I5OOAh3YNUqZCJBBdGPHrKBALnVL8dl0= github.com/inference-gateway/sdk v1.10.0 h1:88m1XTS5J7Q9+sFaKXKHAPXdDpji6SASXVWz2pe8ZFk= github.com/inference-gateway/sdk v1.10.0/go.mod h1:3TTD7Kbr7FRt+9ZbCPAm3u0tXUIWx7flZuwrRgZgrdk= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= diff --git a/main.go b/main.go index 295a24e..58b1e1c 100644 --- a/main.go +++ b/main.go @@ -44,14 +44,15 @@ func main() { l.Info("starting " + AgentName + " agent (version: " + Version + ", environment: " + cfg.Environment + ")") - toolBox := server.NewDefaultToolBox() - // Initialize services playwrightSvc, err := playwright.NewPlaywrightService(l, &cfg) if err != nil { l.Fatal("failed to initialize playwright service", zap.Error(err)) } + // Create toolbox for browser automation skills + toolBox := server.NewToolBox() + // Register navigate_to_url skill navigateToURLSkill := skills.NewNavigateToURLSkill(l, playwrightSvc) toolBox.AddTool(navigateToURLSkill) @@ -92,11 +93,6 @@ func main() { toolBox.AddTool(waitForConditionSkill) l.Info("registered skill: wait_for_condition (Wait for specific conditions before proceeding with automation)") - // Register write_to_csv skill - writeToCsvSkill := skills.NewWriteToCsvSkill(l, playwrightSvc) - toolBox.AddTool(writeToCsvSkill) - l.Info("registered skill: write_to_csv (Write structured data to CSV files with support for custom headers and file paths)") - llmClient, err := server.NewOpenAICompatibleLLMClient(&cfg.A2A.AgentConfig, l) if err != nil { l.Fatal("failed to create LLM client", zap.Error(err)) @@ -105,9 +101,10 @@ func main() { agent, err := server.NewAgentBuilder(l). WithConfig(&cfg.A2A.AgentConfig). WithLLMClient(llmClient). + WithDefaultToolBox(). WithToolBox(toolBox). WithMaxChatCompletion(cfg.A2A.AgentConfig.MaxChatCompletionIterations). - WithSystemPrompt(`You are an expert Playwright browser automation assistant. Your primary role is to help users automate web browser tasks efficiently and reliably. + WithSystemPrompt(`You are an expert Playwright browser automation assistant with the ability to create downloadable artifacts. Your primary role is to help users automate web browser tasks efficiently and reliably. Your core capabilities include: 1. **Web Navigation**: Navigate to URLs, handle redirects, and manage page loads @@ -118,6 +115,7 @@ Your core capabilities include: 6. **JavaScript Execution**: Run custom scripts in the browser context 7. **Authentication Handling**: Manage various authentication methods 8. **Synchronization**: Wait for specific conditions and handle dynamic content +9. **Artifact Creation**: Create downloadable files for screenshots, extracted data, and CSV exports Key expertise areas: - Modern web technologies (SPA, dynamic content, AJAX) @@ -140,6 +138,13 @@ When helping users: - Provide clear explanations of automation steps - Optimize for speed while maintaining reliability +**IMPORTANT - Artifact Creation**: +When users request: +- Screenshots → Use take_screenshot tool, then use create_artifact to save the screenshot file as a downloadable artifact +- Data extraction → Use extract_data tool, then use create_artifact to save the extracted data as a downloadable file (JSON/CSV/TXT) + +After capturing screenshots or extracting data, ALWAYS use the create_artifact tool to make the files downloadable for the user. Read the file from the path returned by the tool and create an artifact with appropriate MIME type. + Your automation solutions should be maintainable, efficient, and production-ready. `). Build() @@ -147,24 +152,36 @@ Your automation solutions should be maintainable, efficient, and production-read l.Fatal("failed to create agent", zap.Error(err)) } + artifactService, err := server.NewArtifactService(&cfg.A2A.ArtifactsConfig, l) + if err != nil { + l.Warn("artifact service could not be created - check ARTIFACTS_ENABLE environment variable", zap.Error(err)) + l.Info("continuing without artifact service support") + artifactService = nil + } + artifactsServer, err := server. NewArtifactsServerBuilder(&cfg.A2A.ArtifactsConfig, l). Build() if err != nil { - l.Warn("artifacts server could not be created - check ARTIFACTS_ENABLE environment variable", zap.Error(err)) - l.Info("continuing without artifacts server support") + l.Warn("artifacts server could not be created", zap.Error(err)) + l.Info("continuing without artifacts server") artifactsServer = nil } - a2aServer, err := server.NewA2AServerBuilder(cfg.A2A, l). + serverBuilder := server.NewA2AServerBuilder(cfg.A2A, l). WithAgent(agent). WithAgentCardFromFile(".well-known/agent-card.json", map[string]any{ "name": AgentName, "version": Version, "description": AgentDescription, "url": cfg.A2A.AgentURL, - }). - WithArtifactStorage(artifactsServer.GetStorage()). + }) + + if artifactService != nil { + serverBuilder = serverBuilder.WithArtifactService(artifactService) + } + + a2aServer, err := serverBuilder. WithDefaultBackgroundTaskHandler(). WithDefaultStreamingTaskHandler(). Build() diff --git a/skills/take_screenshot.go b/skills/take_screenshot.go index e260114..51318d0 100644 --- a/skills/take_screenshot.go +++ b/skills/take_screenshot.go @@ -9,7 +9,6 @@ import ( "time" server "github.com/inference-gateway/adk/server" - types "github.com/inference-gateway/adk/types" playwright "github.com/inference-gateway/browser-agent/internal/playwright" zap "go.uber.org/zap" ) @@ -63,17 +62,6 @@ func NewTakeScreenshotSkill(logger *zap.Logger, playwright playwright.BrowserAut // TakeScreenshotHandler handles the take_screenshot skill execution func (s *TakeScreenshotSkill) TakeScreenshotHandler(ctx context.Context, args map[string]any) (string, error) { - artifactHelper, ok := ctx.Value(server.ArtifactHelperContextKey).(*server.ArtifactHelper) - if !ok { - s.logger.Warn("unable to get artifact helper from context") - return "", fmt.Errorf("artifact helper not available in context") - } - - task, ok := ctx.Value(server.TaskContextKey).(*types.Task) - if !ok { - s.logger.Warn("unable to get task from context") - return "", fmt.Errorf("task not available in context") - } generatedPath, err := s.generateDeterministicPath(args) if err != nil { @@ -132,62 +120,21 @@ func (s *TakeScreenshotSkill) TakeScreenshotHandler(ctx context.Context, args ma return "", fmt.Errorf("screenshot failed: %w", err) } - screenshotData, err := os.ReadFile(generatedPath) - if err != nil { - s.logger.Error("failed to read screenshot file", zap.String("path", generatedPath), zap.Error(err)) - return "", fmt.Errorf("failed to read screenshot file: %w", err) - } - - metadata, err := s.getScreenshotMetadata(generatedPath, fullPage, selector, imageType, quality) - if err != nil { - s.logger.Warn("failed to get screenshot metadata", zap.Error(err)) - } - - mimeType := s.getMimeType(imageType) - filename := filepath.Base(generatedPath) - - screenshotArtifact := artifactHelper.CreateFileArtifactFromBytes( - fmt.Sprintf("Screenshot: %s", filename), - fmt.Sprintf("Screenshot captured from browser session %s", session.ID), - filename, - screenshotData, - &mimeType, - ) - - if metadata != nil { - screenshotArtifact.Metadata = metadata - } - - artifactHelper.AddArtifactToTask(task, screenshotArtifact) - s.logger.Info("artifact added to task", - zap.String("taskID", task.ID), - zap.String("artifactID", screenshotArtifact.ArtifactID)) - - if err := os.Remove(generatedPath); err != nil { - s.logger.Warn("failed to clean up temporary screenshot file", - zap.String("path", generatedPath), - zap.Error(err)) - } else { - s.logger.Debug("cleaned up temporary screenshot file", zap.String("path", generatedPath)) - } - s.logger.Info("screenshot completed successfully", zap.String("sessionID", session.ID), - zap.String("artifactID", screenshotArtifact.ArtifactID), - zap.Int("fileSize", len(screenshotData))) + zap.String("path", generatedPath)) response := map[string]any{ - "success": true, - "filename": filename, - "full_page": fullPage, - "type": imageType, - "quality": quality, - "selector": selector, - "session_id": session.ID, - "artifact_id": screenshotArtifact.ArtifactID, - "file_size": len(screenshotData), - "timestamp": s.getCurrentTimestamp(), - "message": "Screenshot captured successfully and stored as artifact", + "success": true, + "path": generatedPath, + "filename": filepath.Base(generatedPath), + "full_page": fullPage, + "type": imageType, + "quality": quality, + "selector": selector, + "session_id": session.ID, + "timestamp": s.getCurrentTimestamp(), + "message": fmt.Sprintf("Screenshot captured successfully and saved to %s", generatedPath), } responseJSON, err := json.Marshal(response) diff --git a/skills/take_screenshot_test.go b/skills/take_screenshot_test.go index a90d23b..940039d 100644 --- a/skills/take_screenshot_test.go +++ b/skills/take_screenshot_test.go @@ -9,8 +9,6 @@ import ( "testing" "time" - server "github.com/inference-gateway/adk/server" - types "github.com/inference-gateway/adk/types" config "github.com/inference-gateway/browser-agent/config" playwright "github.com/inference-gateway/browser-agent/internal/playwright" mocks "github.com/inference-gateway/browser-agent/internal/playwright/mocks" @@ -54,10 +52,6 @@ func TestTakeScreenshotHandler_BasicFunctionality(t *testing.T) { args := map[string]any{} ctx := context.Background() - ctx = context.WithValue(ctx, server.ArtifactHelperContextKey, server.NewArtifactHelper()) - ctx = context.WithValue(ctx, server.TaskContextKey, &types.Task{ - ID: "test-task-123", - }) result, err := skill.TakeScreenshotHandler(ctx, args) @@ -74,6 +68,15 @@ func TestTakeScreenshotHandler_BasicFunctionality(t *testing.T) { t.Errorf("Expected success to be true, got: %v", response["success"]) } + resultPath, ok := response["path"].(string) + if !ok { + t.Errorf("Expected path in response, got: %v", response["path"]) + } + + if !strings.Contains(resultPath, "viewport_") { + t.Errorf("Expected viewport screenshot path, got: %s", resultPath) + } + resultFilename, ok := response["filename"].(string) if !ok { t.Errorf("Expected filename in response, got: %v", response["filename"]) @@ -98,8 +101,6 @@ func TestTakeScreenshotHandler_FullPageScreenshot(t *testing.T) { } ctx := context.Background() - ctx = context.WithValue(ctx, server.ArtifactHelperContextKey, server.NewArtifactHelper()) - ctx = context.WithValue(ctx, server.TaskContextKey, &types.Task{ID: "test-task-123"}) result, err := skill.TakeScreenshotHandler(ctx, args) @@ -133,8 +134,6 @@ func TestTakeScreenshotHandler_JPEGWithQuality(t *testing.T) { } ctx := context.Background() - ctx = context.WithValue(ctx, server.ArtifactHelperContextKey, server.NewArtifactHelper()) - ctx = context.WithValue(ctx, server.TaskContextKey, &types.Task{ID: "test-task-123"}) result, err := skill.TakeScreenshotHandler(ctx, args) @@ -171,8 +170,6 @@ func TestTakeScreenshotHandler_ElementSelector(t *testing.T) { } ctx := context.Background() - ctx = context.WithValue(ctx, server.ArtifactHelperContextKey, server.NewArtifactHelper()) - ctx = context.WithValue(ctx, server.TaskContextKey, &types.Task{ID: "test-task-123"}) result, err := skill.TakeScreenshotHandler(ctx, args) @@ -203,8 +200,6 @@ func TestTakeScreenshotHandler_DeterministicPath(t *testing.T) { args := map[string]any{} ctx := context.Background() - ctx = context.WithValue(ctx, server.ArtifactHelperContextKey, server.NewArtifactHelper()) - ctx = context.WithValue(ctx, server.TaskContextKey, &types.Task{ID: "test-task-123"}) result, err := skill.TakeScreenshotHandler(ctx, args) @@ -232,8 +227,6 @@ func TestTakeScreenshotHandler_InvalidImageType(t *testing.T) { } ctx := context.Background() - ctx = context.WithValue(ctx, server.ArtifactHelperContextKey, server.NewArtifactHelper()) - ctx = context.WithValue(ctx, server.TaskContextKey, &types.Task{ID: "test-task-123"}) _, err := skill.TakeScreenshotHandler(ctx, args) @@ -258,8 +251,6 @@ func TestTakeScreenshotHandler_InvalidQuality(t *testing.T) { } ctx := context.Background() - ctx = context.WithValue(ctx, server.ArtifactHelperContextKey, server.NewArtifactHelper()) - ctx = context.WithValue(ctx, server.TaskContextKey, &types.Task{ID: "test-task-123"}) _, err := skill.TakeScreenshotHandler(ctx, args) diff --git a/skills/write_to_csv.go b/skills/write_to_csv.go deleted file mode 100644 index bc6df67..0000000 --- a/skills/write_to_csv.go +++ /dev/null @@ -1,288 +0,0 @@ -package skills - -import ( - "bytes" - "context" - "encoding/csv" - "fmt" - "os" - "path/filepath" - "strconv" - - server "github.com/inference-gateway/adk/server" - types "github.com/inference-gateway/adk/types" - playwright "github.com/inference-gateway/browser-agent/internal/playwright" - zap "go.uber.org/zap" -) - -// WriteToCsvSkill struct holds the skill with services -type WriteToCsvSkill struct { - logger *zap.Logger - playwright playwright.BrowserAutomation -} - -// NewWriteToCsvSkill creates a new write_to_csv skill -func NewWriteToCsvSkill(logger *zap.Logger, playwright playwright.BrowserAutomation) server.Tool { - skill := &WriteToCsvSkill{ - logger: logger, - playwright: playwright, - } - return server.NewBasicTool( - "write_to_csv", - "Write structured data to CSV files with support for custom headers and file paths", - map[string]any{ - "type": "object", - "properties": map[string]any{ - "append": map[string]any{ - "default": false, - "description": "Whether to append to existing file or create new file", - "type": "boolean", - }, - "data": map[string]any{ - "description": "Array of objects to write to CSV, each object represents a row", - "items": map[string]any{"type": "object"}, - "type": "array", - }, - "filename": map[string]any{ - "description": "Name of the CSV file (without path, will be saved to configured data directory)", - "type": "string", - }, - "headers": map[string]any{ - "description": "Custom column headers for the CSV file (optional, will use object keys if not provided)", - "items": map[string]any{"type": "string"}, - "type": "array", - }, - "include_headers": map[string]any{ - "default": true, - "description": "Whether to include headers in the CSV output", - "type": "boolean", - }, - }, - "required": []string{"data", "filename"}, - }, - skill.WriteToCsvHandler, - ) -} - -// WriteToCsvHandler handles the write_to_csv skill execution -func (s *WriteToCsvSkill) WriteToCsvHandler(ctx context.Context, args map[string]any) (string, error) { - artifactHelper, ok := ctx.Value(server.ArtifactHelperContextKey).(*server.ArtifactHelper) - if !ok { - s.logger.Warn("unable to get artifact helper from context") - return "", fmt.Errorf("artifact helper not available in context") - } - - task, ok := ctx.Value(server.TaskContextKey).(*types.Task) - if !ok { - s.logger.Warn("unable to get task from context") - return "", fmt.Errorf("task not available in context") - } - - data, ok := args["data"].([]any) - if !ok || len(data) == 0 { - s.logger.Error("data parameter is required and must be a non-empty array") - return "", fmt.Errorf("data parameter is required and must be a non-empty array") - } - - filename, ok := args["filename"].(string) - if !ok || filename == "" { - s.logger.Error("filename parameter is required and must be a non-empty string") - return "", fmt.Errorf("filename parameter is required and must be a non-empty string") - } - - filePath := s.generateFilePath(filename) - - var customHeaders []string - if headers, ok := args["headers"].([]any); ok { - customHeaders = make([]string, len(headers)) - for i, header := range headers { - if headerStr, ok := header.(string); ok { - customHeaders[i] = headerStr - } else { - return "", fmt.Errorf("all headers must be strings") - } - } - } - - append := false - if appendVal, ok := args["append"].(bool); ok { - append = appendVal - } - - includeHeaders := true - if includeVal, ok := args["include_headers"].(bool); ok { - includeHeaders = includeVal - } - - s.logger.Info("writing data to CSV file", - zap.String("filename", filename), - zap.String("file_path", filePath), - zap.Int("rows_count", len(data)), - zap.Bool("append", append), - zap.Bool("include_headers", includeHeaders)) - - rows, err := s.convertDataToRows(data) - if err != nil { - s.logger.Error("failed to convert data to rows", zap.Error(err)) - return "", fmt.Errorf("failed to convert data to rows: %w", err) - } - - headers := customHeaders - if len(headers) == 0 && len(rows) > 0 { - headers = s.extractHeadersFromRows(rows) - } - - var csvBuffer bytes.Buffer - writer := csv.NewWriter(&csvBuffer) - - if includeHeaders && len(headers) > 0 { - if err := writer.Write(headers); err != nil { - return "", fmt.Errorf("failed to write headers: %w", err) - } - } - - rowsWritten := 0 - for _, row := range rows { - csvRow := make([]string, len(headers)) - for i, header := range headers { - if value, exists := row[header]; exists { - csvRow[i] = s.valueToString(value) - } else { - csvRow[i] = "" - } - } - - if err := writer.Write(csvRow); err != nil { - return "", fmt.Errorf("failed to write row: %w", err) - } - rowsWritten++ - } - - writer.Flush() - if err := writer.Error(); err != nil { - return "", fmt.Errorf("CSV writer error: %w", err) - } - - csvData := csvBuffer.Bytes() - - mimeType := "text/csv" - baseFilename := filepath.Base(filePath) - csvArtifact := artifactHelper.CreateFileArtifactFromBytes( - fmt.Sprintf("CSV File: %s", baseFilename), - fmt.Sprintf("CSV file with %d rows written to %s", rowsWritten, filePath), - baseFilename, - csvData, - &mimeType, - ) - - csvArtifact.Metadata = map[string]any{ - "rows_written": rowsWritten, - "headers": headers, - "include_headers": includeHeaders, - "append_mode": append, - "file_size": len(csvData), - "original_records": len(data), - } - - artifactHelper.AddArtifactToTask(task, csvArtifact) - s.logger.Info("CSV artifact added to task", - zap.String("taskID", task.ID), - zap.String("artifactID", csvArtifact.ArtifactID)) - - s.logger.Info("CSV data created successfully as artifact", - zap.String("filename", baseFilename), - zap.Int("rows_written", rowsWritten), - zap.String("artifactID", csvArtifact.ArtifactID)) - - result := fmt.Sprintf("Successfully created CSV with %d rows as artifact %s (%s)", rowsWritten, csvArtifact.ArtifactID, baseFilename) - return result, nil -} - -func (s *WriteToCsvSkill) generateFilePath(filename string) string { - var dataDir string - - if s.playwright != nil && s.playwright.GetConfig() != nil { - dataDir = s.playwright.GetConfig().Browser.DataDir - } - - if dataDir == "" { - dataDir = "." - } - - if err := os.MkdirAll(dataDir, 0755); err != nil { - s.logger.Warn("failed to create data files directory", zap.String("dir", dataDir), zap.Error(err)) - } - - if !filepath.IsAbs(filename) { - return filepath.Join(dataDir, filename) - } - return filename -} - -func (s *WriteToCsvSkill) convertDataToRows(data []any) ([]map[string]any, error) { - rows := make([]map[string]any, len(data)) - - for i, item := range data { - switch v := item.(type) { - case map[string]any: - rows[i] = v - case map[any]any: - converted := make(map[string]any) - for key, value := range v { - if keyStr, ok := key.(string); ok { - converted[keyStr] = value - } else { - converted[fmt.Sprintf("%v", key)] = value - } - } - rows[i] = converted - default: - return nil, fmt.Errorf("data item at index %d must be an object/map, got %T", i, item) - } - } - - return rows, nil -} - -func (s *WriteToCsvSkill) extractHeadersFromRows(rows []map[string]any) []string { - headerSet := make(map[string]bool) - var headers []string - - for _, row := range rows { - for key := range row { - if !headerSet[key] { - headerSet[key] = true - headers = append(headers, key) - } - } - } - - return headers -} - -func (s *WriteToCsvSkill) valueToString(value any) string { - if value == nil { - return "" - } - - switch v := value.(type) { - case string: - return v - case int: - return strconv.Itoa(v) - case int64: - return strconv.FormatInt(v, 10) - case float64: - return strconv.FormatFloat(v, 'f', -1, 64) - case bool: - return strconv.FormatBool(v) - case []any: - var items []string - for _, item := range v { - items = append(items, s.valueToString(item)) - } - return fmt.Sprintf("[%s]", fmt.Sprintf("%v", items)) - default: - return fmt.Sprintf("%v", v) - } -} diff --git a/skills/write_to_csv_test.go b/skills/write_to_csv_test.go deleted file mode 100644 index 3eb0384..0000000 --- a/skills/write_to_csv_test.go +++ /dev/null @@ -1,278 +0,0 @@ -package skills - -import ( - "context" - "strings" - "testing" - - server "github.com/inference-gateway/adk/server" - types "github.com/inference-gateway/adk/types" - config "github.com/inference-gateway/browser-agent/config" - mocks "github.com/inference-gateway/browser-agent/internal/playwright/mocks" - zap "go.uber.org/zap" -) - -func TestWriteToCsvHandler(t *testing.T) { - logger := zap.NewNop() - mockPlaywright := &mocks.FakeBrowserAutomation{} - mockPlaywright.GetConfigReturns(&config.Config{ - Browser: config.BrowserConfig{ - DataDir: "/tmp", - }, - }) - - skill := &WriteToCsvSkill{ - logger: logger, - playwright: mockPlaywright, - } - - tests := []struct { - name string - args map[string]any - expectedError bool - expectedRows int - validateOutput func(t *testing.T, result string) - }{ - { - name: "basic CSV writing", - args: map[string]any{ - "data": []any{ - map[string]any{"name": "Alice", "age": 30, "city": "New York"}, - map[string]any{"name": "Bob", "age": 25, "city": "San Francisco"}, - }, - "filename": "basic.csv", - }, - expectedError: false, - expectedRows: 2, - validateOutput: func(t *testing.T, result string) { - if !strings.Contains(result, "2 rows") { - t.Errorf("Expected result to mention 2 rows, got: %s", result) - } - if !strings.Contains(result, "basic.csv") { - t.Errorf("Expected result to mention basic.csv, got: %s", result) - } - if !strings.Contains(result, "artifact") { - t.Errorf("Expected result to mention artifact, got: %s", result) - } - }, - }, - { - name: "CSV with custom headers", - args: map[string]any{ - "data": []any{ - map[string]any{"name": "Alice", "age": 30}, - map[string]any{"name": "Bob", "age": 25}, - }, - "filename": "custom_headers.csv", - "headers": []any{"name", "age"}, - }, - expectedError: false, - expectedRows: 2, - validateOutput: func(t *testing.T, result string) { - if !strings.Contains(result, "2 rows") { - t.Errorf("Expected result to mention 2 rows, got: %s", result) - } - if !strings.Contains(result, "custom_headers.csv") { - t.Errorf("Expected result to mention custom_headers.csv, got: %s", result) - } - if !strings.Contains(result, "artifact") { - t.Errorf("Expected result to mention artifact, got: %s", result) - } - }, - }, - { - name: "CSV without headers", - args: map[string]any{ - "data": []any{ - map[string]any{"name": "Alice", "age": 30}, - map[string]any{"name": "Bob", "age": 25}, - }, - "filename": "no_headers.csv", - "include_headers": false, - }, - expectedError: false, - expectedRows: 2, - validateOutput: func(t *testing.T, result string) { - if !strings.Contains(result, "2 rows") { - t.Errorf("Expected result to mention 2 rows, got: %s", result) - } - if !strings.Contains(result, "no_headers.csv") { - t.Errorf("Expected result to mention no_headers.csv, got: %s", result) - } - if !strings.Contains(result, "artifact") { - t.Errorf("Expected result to mention artifact, got: %s", result) - } - }, - }, - { - name: "append to existing file", - args: map[string]any{ - "data": []any{ - map[string]any{"name": "Charlie", "age": 35}, - }, - "filename": "basic.csv", - "append": true, - }, - expectedError: false, - expectedRows: 1, - validateOutput: func(t *testing.T, result string) { - if !strings.Contains(result, "1 rows") { - t.Errorf("Expected result to mention 1 rows, got: %s", result) - } - if !strings.Contains(result, "basic.csv") { - t.Errorf("Expected result to mention basic.csv, got: %s", result) - } - if !strings.Contains(result, "artifact") { - t.Errorf("Expected result to mention artifact, got: %s", result) - } - }, - }, - { - name: "invalid data type", - args: map[string]any{ - "data": "not an array", - "filename": "invalid.csv", - }, - expectedError: true, - }, - { - name: "empty file path", - args: map[string]any{ - "data": []any{map[string]any{"name": "Alice"}}, - "filename": "", - }, - expectedError: true, - }, - { - name: "empty data array", - args: map[string]any{ - "data": []any{}, - "filename": "empty.csv", - }, - expectedError: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - ctx := context.Background() - ctx = context.WithValue(ctx, server.ArtifactHelperContextKey, server.NewArtifactHelper()) - ctx = context.WithValue(ctx, server.TaskContextKey, &types.Task{ID: "test-task-123"}) - - result, err := skill.WriteToCsvHandler(ctx, tt.args) - - if tt.expectedError { - if err == nil { - t.Error("Expected an error but got none") - } - return - } - - if err != nil { - t.Errorf("Unexpected error: %v", err) - return - } - - if !strings.Contains(result, "Successfully created CSV") { - t.Errorf("Expected success message, got: %s", result) - } - - if tt.validateOutput != nil { - tt.validateOutput(t, result) - } - }) - } -} - -func TestConvertDataToRows(t *testing.T) { - logger := zap.NewNop() - skill := &WriteToCsvSkill{logger: logger} - - tests := []struct { - name string - input []any - expectedError bool - expectedLen int - }{ - { - name: "valid map[string]any data", - input: []any{ - map[string]any{"name": "Alice", "age": 30}, - map[string]any{"name": "Bob", "age": 25}, - }, - expectedError: false, - expectedLen: 2, - }, - { - name: "mixed map types", - input: []any{ - map[string]any{"name": "Alice"}, - map[any]any{"name": "Bob", "age": 25}, - }, - expectedError: false, - expectedLen: 2, - }, - { - name: "invalid data type", - input: []any{ - "not a map", - map[string]any{"name": "Alice"}, - }, - expectedError: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result, err := skill.convertDataToRows(tt.input) - - if tt.expectedError { - if err == nil { - t.Error("Expected an error but got none") - } - return - } - - if err != nil { - t.Errorf("Unexpected error: %v", err) - return - } - - if len(result) != tt.expectedLen { - t.Errorf("Expected %d rows, got %d", tt.expectedLen, len(result)) - } - }) - } -} - -func TestValueToString(t *testing.T) { - logger := zap.NewNop() - skill := &WriteToCsvSkill{logger: logger} - - tests := []struct { - name string - input any - expected string - }{ - {"string", "hello", "hello"}, - {"int", 42, "42"}, - {"float", 3.14, "3.14"}, - {"bool true", true, "true"}, - {"bool false", false, "false"}, - {"nil", nil, ""}, - {"array", []any{"a", "b", "c"}, "[%!v([]string=[a b c])]"}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := skill.valueToString(tt.input) - if tt.name != "array" && result != tt.expected { - t.Errorf("Expected %q, got %q", tt.expected, result) - } - - if tt.name == "array" && result == "" { - t.Error("Expected non-empty string for array") - } - }) - } -} From 622bc0eb3a5641ff50cbe1649a8ff40bca611215 Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Tue, 14 Oct 2025 17:48:12 +0200 Subject: [PATCH 2/4] chore: Update ADL CLI version references from 0.23.2 to 0.23.4 across multiple files Signed-off-by: Eden Reich --- .github/workflows/cd.yml | 2 +- .github/workflows/ci.yml | 2 +- .releaserc.yaml | 2 +- .well-known/agent-card.json | 7 ------- AGENTS.md | 26 ++++++++++---------------- CLAUDE.md | 5 ++--- Taskfile.yml | 2 +- config/config.go | 2 +- internal/logger/logger.go | 2 +- main.go | 4 ++-- 10 files changed, 20 insertions(+), 34 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index ee05206..23c8d71 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -1,4 +1,4 @@ -# Code generated by ADL CLI v0.23.2. DO NOT EDIT. +# Code generated by ADL CLI v0.23.4. DO NOT EDIT. # This file was automatically generated from an ADL (Agent Definition Language) specification. # Manual changes to this file may be overwritten during regeneration. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a10b2c8..f6811de 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,4 +1,4 @@ -# Code generated by ADL CLI v0.23.2. DO NOT EDIT. +# Code generated by ADL CLI v0.23.4. DO NOT EDIT. # This file was automatically generated from an ADL (Agent Definition Language) specification. # Manual changes to this file may be overwritten during regeneration. diff --git a/.releaserc.yaml b/.releaserc.yaml index 69f80d5..98de905 100644 --- a/.releaserc.yaml +++ b/.releaserc.yaml @@ -1,4 +1,4 @@ -# Code generated by ADL CLI v0.23.2. DO NOT EDIT. +# Code generated by ADL CLI v0.23.4. DO NOT EDIT. # This file was automatically generated from an ADL (Agent Definition Language) specification. # Manual changes to this file may be overwritten during regeneration. diff --git a/.well-known/agent-card.json b/.well-known/agent-card.json index 029e4d0..211fad1 100644 --- a/.well-known/agent-card.json +++ b/.well-known/agent-card.json @@ -68,13 +68,6 @@ "description": "Wait for specific conditions before proceeding with automation", "tags": ["wait","synchronization","timing","playwright"], "schema": {"properties":{"condition":{"description":"Type of condition (selector, navigation, function, timeout, networkidle)","type":"string"},"custom_function":{"description":"Custom JavaScript function to evaluate for 'function' condition","type":"string"},"selector":{"description":"Selector to wait for if condition is 'selector'","type":"string"},"state":{"default":"visible","description":"State to wait for (visible, hidden, attached, detached)","type":"string"},"timeout":{"default":30000,"description":"Maximum time to wait in milliseconds","type":"integer"}},"required":["condition"],"type":"object"} - }, - { - "id": "write_to_csv", - "name": "write_to_csv", - "description": "Write structured data to CSV files with support for custom headers and file paths", - "tags": ["export","csv","data","file"], - "schema": {"properties":{"append":{"default":false,"description":"Whether to append to existing file or create new file","type":"boolean"},"data":{"description":"Array of objects to write to CSV, each object represents a row","items":{"type":"object"},"type":"array"},"filename":{"description":"Name of the CSV file (without path, will be saved to configured data directory)","type":"string"},"headers":{"description":"Custom column headers for the CSV file (optional, will use object keys if not provided)","items":{"type":"string"},"type":"array"},"include_headers":{"default":true,"description":"Whether to include headers in the CSV output","type":"boolean"}},"required":["data","filename"],"type":"object"} } ] } diff --git a/AGENTS.md b/AGENTS.md index cddf862..eea0b8e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -30,7 +30,7 @@ This agent is built using the Agent Definition Language (ADL) and provides A2A c -**System Prompt**: You are an expert Playwright browser automation assistant. Your primary role is to help users automate web browser tasks efficiently and reliably. +**System Prompt**: You are an expert Playwright browser automation assistant with the ability to create downloadable artifacts. Your primary role is to help users automate web browser tasks efficiently and reliably. Your core capabilities include: 1. **Web Navigation**: Navigate to URLs, handle redirects, and manage page loads @@ -41,6 +41,7 @@ Your core capabilities include: 6. **JavaScript Execution**: Run custom scripts in the browser context 7. **Authentication Handling**: Manage various authentication methods 8. **Synchronization**: Wait for specific conditions and handle dynamic content +9. **Artifact Creation**: Create downloadable files for screenshots, extracted data, and CSV exports Key expertise areas: - Modern web technologies (SPA, dynamic content, AJAX) @@ -63,6 +64,13 @@ When helping users: - Provide clear explanations of automation steps - Optimize for speed while maintaining reliability +**IMPORTANT - Artifact Creation**: +When users request: +- Screenshots → Use take_screenshot tool, then use create_artifact to save the screenshot file as a downloadable artifact +- Data extraction → Use extract_data tool, then use create_artifact to save the extracted data as a downloadable file (JSON/CSV/TXT) + +After capturing screenshots or extracting data, ALWAYS use the create_artifact tool to make the files downloadable for the user. Read the file from the path returned by the tool and create an artifact with appropriate MIME type. + Your automation solutions should be maintainable, efficient, and production-ready. @@ -75,7 +83,7 @@ Your automation solutions should be maintainable, efficient, and production-read ## Skills -This agent provides 9 skills: +This agent provides 8 skills: ### navigate_to_url @@ -134,13 +142,6 @@ This agent provides 9 skills: - **Output Schema**: Defined in agent configuration -### write_to_csv -- **Description**: Write structured data to CSV files with support for custom headers and file paths -- **Tags**: export, csv, data, file -- **Input Schema**: Defined in agent configuration -- **Output Schema**: Defined in agent configuration - - ## Server Configuration @@ -246,11 +247,6 @@ curl -X POST http://localhost:8080/skills/wait_for_condition \ -H "Content-Type: application/json" \ -d '{"input": "your_input_here"}' -# Execute write_to_csv skill -curl -X POST http://localhost:8080/skills/write_to_csv \ - -H "Content-Type: application/json" \ - -d '{"input": "your_input_here"}' - ``` @@ -298,8 +294,6 @@ docker run -p 8080:8080 browser-agent │ └── wait_for_condition.go # Wait for specific conditions before proceeding with automation -│ └── write_to_csv.go # Write structured data to CSV files with support for custom headers and file paths - ├── .well-known/ # Agent configuration │ └── agent-card.json # Agent metadata ├── go.mod # Go module definition diff --git a/CLAUDE.md b/CLAUDE.md index 5726ee4..c20fbcb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -10,7 +10,7 @@ browser-agent is an A2A (Agent-to-Agent) server implementing the [A2A Protocol]( ### ADL-Generated Structure -The codebase is generated using ADL CLI 0.23.2 and follows a strict generation pattern: +The codebase is generated using ADL CLI 0.23.4 and follows a strict generation pattern: - **Generated Files**: Marked with `DO NOT EDIT` headers - manual changes will be overwritten - **Configuration Source**: `agent.yaml` - defines agent capabilities, skills, and metadata - **Server Implementation**: Built on the ADK (Agent Development Kit) framework from `github.com/inference-gateway/adk` @@ -82,7 +82,6 @@ The following skills are currently defined: - **execute_script**: Execute custom JavaScript code in the browser context - **handle_authentication**: Handle various authentication scenarios including basic auth, OAuth, and custom login forms - **wait_for_condition**: Wait for specific conditions before proceeding with automation -- **write_to_csv**: Write structured data to CSV files with support for custom headers and file paths To modify skills: 1. Update `agent.yaml` with skill definitions @@ -118,7 +117,7 @@ Activate with: `flox activate` (if Flox is installed) - **Generated Files**: Never manually edit files with "DO NOT EDIT" headers - **Configuration Changes**: Always modify `agent.yaml` and regenerate -- **ADL Version**: Ensure ADL CLI 0.23.2 or compatible version for regeneration +- **ADL Version**: Ensure ADL CLI 0.23.4 or compatible version for regeneration - **Port Configuration**: Default 8080, configurable via `A2A_PORT` or `A2A_SERVER_PORT` ## Debugging Tips diff --git a/Taskfile.yml b/Taskfile.yml index 7406c65..59df46e 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -1,4 +1,4 @@ -# Code generated by ADL CLI v0.23.2. DO NOT EDIT. +# Code generated by ADL CLI v0.23.4. DO NOT EDIT. # This file was automatically generated from an ADL (Agent Definition Language) specification. # Manual changes to this file may be overwritten during regeneration. diff --git a/config/config.go b/config/config.go index 76c3da0..44ae5ac 100644 --- a/config/config.go +++ b/config/config.go @@ -1,4 +1,4 @@ -// Code generated by ADL CLI v0.23.2. DO NOT EDIT. +// Code generated by ADL CLI v0.23.4. DO NOT EDIT. // This file was automatically generated from an ADL (Agent Definition Language) specification. // Manual changes to this file may be overwritten during regeneration. diff --git a/internal/logger/logger.go b/internal/logger/logger.go index 669a094..1e08e0d 100644 --- a/internal/logger/logger.go +++ b/internal/logger/logger.go @@ -1,4 +1,4 @@ -// Code generated by ADL CLI v0.23.2. DO NOT EDIT. +// Code generated by ADL CLI v0.23.4. DO NOT EDIT. // This file was automatically generated from an ADL (Agent Definition Language) specification. // Manual changes to this file may be overwritten during regeneration. diff --git a/main.go b/main.go index 58b1e1c..1fa2e62 100644 --- a/main.go +++ b/main.go @@ -1,4 +1,4 @@ -// Code generated by ADL CLI v0.23.2. DO NOT EDIT. +// Code generated by ADL CLI v0.23.4. DO NOT EDIT. // This file was automatically generated from an ADL (Agent Definition Language) specification. // Manual changes to this file may be overwritten during regeneration. @@ -50,7 +50,7 @@ func main() { l.Fatal("failed to initialize playwright service", zap.Error(err)) } - // Create toolbox for browser automation skills + // Create toolbox for skills toolBox := server.NewToolBox() // Register navigate_to_url skill From 7c7150ec82139534265477efa40f1388aa236f36 Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Tue, 14 Oct 2025 18:55:37 +0200 Subject: [PATCH 3/4] chore: Update ADL CLI version references from 0.23.4 to 0.23.6 in multiple files; add A2A_AGENT_CLIENT_TOOLS_CREATE_ARTIFACT configuration Signed-off-by: Eden Reich --- .github/workflows/cd.yml | 2 +- .github/workflows/ci.yml | 2 +- .releaserc.yaml | 2 +- CLAUDE.md | 4 ++-- Taskfile.yml | 2 +- config/config.go | 2 +- example/docker-compose.yaml | 1 + internal/logger/logger.go | 2 +- main.go | 7 +++---- 9 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 23c8d71..99fd276 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -1,4 +1,4 @@ -# Code generated by ADL CLI v0.23.4. DO NOT EDIT. +# Code generated by ADL CLI v0.23.6. DO NOT EDIT. # This file was automatically generated from an ADL (Agent Definition Language) specification. # Manual changes to this file may be overwritten during regeneration. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f6811de..79ff8ac 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,4 +1,4 @@ -# Code generated by ADL CLI v0.23.4. DO NOT EDIT. +# Code generated by ADL CLI v0.23.6. DO NOT EDIT. # This file was automatically generated from an ADL (Agent Definition Language) specification. # Manual changes to this file may be overwritten during regeneration. diff --git a/.releaserc.yaml b/.releaserc.yaml index 98de905..2ba3cd2 100644 --- a/.releaserc.yaml +++ b/.releaserc.yaml @@ -1,4 +1,4 @@ -# Code generated by ADL CLI v0.23.4. DO NOT EDIT. +# Code generated by ADL CLI v0.23.6. DO NOT EDIT. # This file was automatically generated from an ADL (Agent Definition Language) specification. # Manual changes to this file may be overwritten during regeneration. diff --git a/CLAUDE.md b/CLAUDE.md index c20fbcb..1d7f10f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -10,7 +10,7 @@ browser-agent is an A2A (Agent-to-Agent) server implementing the [A2A Protocol]( ### ADL-Generated Structure -The codebase is generated using ADL CLI 0.23.4 and follows a strict generation pattern: +The codebase is generated using ADL CLI 0.23.6 and follows a strict generation pattern: - **Generated Files**: Marked with `DO NOT EDIT` headers - manual changes will be overwritten - **Configuration Source**: `agent.yaml` - defines agent capabilities, skills, and metadata - **Server Implementation**: Built on the ADK (Agent Development Kit) framework from `github.com/inference-gateway/adk` @@ -117,7 +117,7 @@ Activate with: `flox activate` (if Flox is installed) - **Generated Files**: Never manually edit files with "DO NOT EDIT" headers - **Configuration Changes**: Always modify `agent.yaml` and regenerate -- **ADL Version**: Ensure ADL CLI 0.23.4 or compatible version for regeneration +- **ADL Version**: Ensure ADL CLI 0.23.6 or compatible version for regeneration - **Port Configuration**: Default 8080, configurable via `A2A_PORT` or `A2A_SERVER_PORT` ## Debugging Tips diff --git a/Taskfile.yml b/Taskfile.yml index 59df46e..582a0fa 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -1,4 +1,4 @@ -# Code generated by ADL CLI v0.23.4. DO NOT EDIT. +# Code generated by ADL CLI v0.23.6. DO NOT EDIT. # This file was automatically generated from an ADL (Agent Definition Language) specification. # Manual changes to this file may be overwritten during regeneration. diff --git a/config/config.go b/config/config.go index 44ae5ac..ef6cb77 100644 --- a/config/config.go +++ b/config/config.go @@ -1,4 +1,4 @@ -// Code generated by ADL CLI v0.23.4. DO NOT EDIT. +// Code generated by ADL CLI v0.23.6. DO NOT EDIT. // This file was automatically generated from an ADL (Agent Definition Language) specification. // Manual changes to this file may be overwritten during regeneration. diff --git a/example/docker-compose.yaml b/example/docker-compose.yaml index 5c4e1bc..6fe0526 100644 --- a/example/docker-compose.yaml +++ b/example/docker-compose.yaml @@ -53,6 +53,7 @@ services: A2A_AGENT_CLIENT_MAX_CHAT_COMPLETION_ITERATIONS: 20 A2A_AGENT_CLIENT_MAX_TOKENS: 4096 A2A_AGENT_CLIENT_TEMPERATURE: 0.7 + A2A_AGENT_CLIENT_TOOLS_CREATE_ARTIFACT: true A2A_CAPABILITIES_STREAMING: true A2A_CAPABILITIES_PUSH_NOTIFICATIONS: false A2A_CAPABILITIES_STATE_TRANSITION_HISTORY: false diff --git a/internal/logger/logger.go b/internal/logger/logger.go index 1e08e0d..5a913ff 100644 --- a/internal/logger/logger.go +++ b/internal/logger/logger.go @@ -1,4 +1,4 @@ -// Code generated by ADL CLI v0.23.4. DO NOT EDIT. +// Code generated by ADL CLI v0.23.6. DO NOT EDIT. // This file was automatically generated from an ADL (Agent Definition Language) specification. // Manual changes to this file may be overwritten during regeneration. diff --git a/main.go b/main.go index 1fa2e62..a756856 100644 --- a/main.go +++ b/main.go @@ -1,4 +1,4 @@ -// Code generated by ADL CLI v0.23.4. DO NOT EDIT. +// Code generated by ADL CLI v0.23.6. DO NOT EDIT. // This file was automatically generated from an ADL (Agent Definition Language) specification. // Manual changes to this file may be overwritten during regeneration. @@ -50,8 +50,8 @@ func main() { l.Fatal("failed to initialize playwright service", zap.Error(err)) } - // Create toolbox for skills - toolBox := server.NewToolBox() + // Create toolbox with default tools (like input_required, create_artifact etc) + toolBox := server.NewDefaultToolBox(&cfg.A2A.AgentConfig.ToolBoxConfig) // Register navigate_to_url skill navigateToURLSkill := skills.NewNavigateToURLSkill(l, playwrightSvc) @@ -101,7 +101,6 @@ func main() { agent, err := server.NewAgentBuilder(l). WithConfig(&cfg.A2A.AgentConfig). WithLLMClient(llmClient). - WithDefaultToolBox(). WithToolBox(toolBox). WithMaxChatCompletion(cfg.A2A.AgentConfig.MaxChatCompletionIterations). WithSystemPrompt(`You are an expert Playwright browser automation assistant with the ability to create downloadable artifacts. Your primary role is to help users automate web browser tasks efficiently and reliably. From 9191b2d48a0985122430e18435f5b06590d187e6 Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Tue, 14 Oct 2025 19:22:54 +0200 Subject: [PATCH 4/4] refactor: Update artifact creation process in screenshot handling - use programmatically approach for this one Sense the artifacts in this case created as screenshots already by playwright there i no need to send it over to the LLM and back to another tool in order to create it, this would be a waste of tokens. Signed-off-by: Eden Reich --- AGENTS.md | 6 +-- agent.yaml | 6 +-- main.go | 6 +-- skills/take_screenshot.go | 110 ++++++++++++++++++++++++++------------ 4 files changed, 83 insertions(+), 45 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index eea0b8e..31b9c8f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -65,11 +65,9 @@ When helping users: - Optimize for speed while maintaining reliability **IMPORTANT - Artifact Creation**: -When users request: -- Screenshots → Use take_screenshot tool, then use create_artifact to save the screenshot file as a downloadable artifact -- Data extraction → Use extract_data tool, then use create_artifact to save the extracted data as a downloadable file (JSON/CSV/TXT) +When users request screenshots, the take_screenshot tool automatically creates downloadable artifacts. The screenshot will be available via a download URL returned in the response. -After capturing screenshots or extracting data, ALWAYS use the create_artifact tool to make the files downloadable for the user. Read the file from the path returned by the tool and create an artifact with appropriate MIME type. +For data extraction, you can use the create_artifact tool to save extracted data as downloadable files (JSON/CSV/TXT). Your automation solutions should be maintainable, efficient, and production-ready. diff --git a/agent.yaml b/agent.yaml index 03e4a6b..91f02f3 100644 --- a/agent.yaml +++ b/agent.yaml @@ -349,11 +349,9 @@ spec: - Optimize for speed while maintaining reliability **IMPORTANT - Artifact Creation**: - When users request: - - Screenshots → Use take_screenshot tool, then use create_artifact to save the screenshot file as a downloadable artifact - - Data extraction → Use extract_data tool, then use create_artifact to save the extracted data as a downloadable file (JSON/CSV/TXT) + When users request screenshots, the take_screenshot tool automatically creates downloadable artifacts. The screenshot will be available via a download URL returned in the response. - After capturing screenshots or extracting data, ALWAYS use the create_artifact tool to make the files downloadable for the user. Read the file from the path returned by the tool and create an artifact with appropriate MIME type. + For data extraction, you can use the create_artifact tool to save extracted data as downloadable files (JSON/CSV/TXT). Your automation solutions should be maintainable, efficient, and production-ready. services: diff --git a/main.go b/main.go index a756856..77badbf 100644 --- a/main.go +++ b/main.go @@ -138,11 +138,9 @@ When helping users: - Optimize for speed while maintaining reliability **IMPORTANT - Artifact Creation**: -When users request: -- Screenshots → Use take_screenshot tool, then use create_artifact to save the screenshot file as a downloadable artifact -- Data extraction → Use extract_data tool, then use create_artifact to save the extracted data as a downloadable file (JSON/CSV/TXT) +When users request screenshots, the take_screenshot tool automatically creates downloadable artifacts. The screenshot will be available via a download URL returned in the response. -After capturing screenshots or extracting data, ALWAYS use the create_artifact tool to make the files downloadable for the user. Read the file from the path returned by the tool and create an artifact with appropriate MIME type. +For data extraction, you can use the create_artifact tool to save extracted data as downloadable files (JSON/CSV/TXT). Your automation solutions should be maintainable, efficient, and production-ready. `). diff --git a/skills/take_screenshot.go b/skills/take_screenshot.go index 51318d0..4ea9e6e 100644 --- a/skills/take_screenshot.go +++ b/skills/take_screenshot.go @@ -9,6 +9,7 @@ import ( "time" server "github.com/inference-gateway/adk/server" + types "github.com/inference-gateway/adk/types" playwright "github.com/inference-gateway/browser-agent/internal/playwright" zap "go.uber.org/zap" ) @@ -124,17 +125,45 @@ func (s *TakeScreenshotSkill) TakeScreenshotHandler(ctx context.Context, args ma zap.String("sessionID", session.ID), zap.String("path", generatedPath)) + artifactURL, artifactID, err := s.createArtifactFromScreenshot(ctx, generatedPath, imageType) + if err != nil { + s.logger.Debug("artifact creation skipped or failed, returning file path only", + zap.Error(err), + zap.String("path", generatedPath)) + + response := map[string]any{ + "success": true, + "path": generatedPath, + "filename": filepath.Base(generatedPath), + "full_page": fullPage, + "type": imageType, + "quality": quality, + "selector": selector, + "session_id": session.ID, + "timestamp": s.getCurrentTimestamp(), + "message": fmt.Sprintf("Screenshot captured successfully and saved to %s", generatedPath), + } + + responseJSON, err := json.Marshal(response) + if err != nil { + return "", fmt.Errorf("failed to marshal response: %w", err) + } + return string(responseJSON), nil + } + response := map[string]any{ - "success": true, - "path": generatedPath, - "filename": filepath.Base(generatedPath), - "full_page": fullPage, - "type": imageType, - "quality": quality, - "selector": selector, - "session_id": session.ID, - "timestamp": s.getCurrentTimestamp(), - "message": fmt.Sprintf("Screenshot captured successfully and saved to %s", generatedPath), + "success": true, + "path": generatedPath, + "filename": filepath.Base(generatedPath), + "full_page": fullPage, + "type": imageType, + "quality": quality, + "selector": selector, + "session_id": session.ID, + "timestamp": s.getCurrentTimestamp(), + "artifact_id": artifactID, + "url": artifactURL, + "message": fmt.Sprintf("Screenshot captured successfully. Download URL: %s", artifactURL), } responseJSON, err := json.Marshal(response) @@ -203,36 +232,51 @@ func (s *TakeScreenshotSkill) getOrCreateSession(ctx context.Context) (*playwrig return s.playwright.GetOrCreateDefaultSession(ctx) } -// getScreenshotMetadata extracts metadata about the screenshot file -func (s *TakeScreenshotSkill) getScreenshotMetadata(path string, fullPage bool, selector, imageType string, quality int) (map[string]any, error) { - fileInfo, err := os.Stat(path) - if err != nil { - return nil, fmt.Errorf("failed to get file info: %w", err) +// getCurrentTimestamp returns the current timestamp in RFC3339 format +func (s *TakeScreenshotSkill) getCurrentTimestamp() string { + return time.Now().Format(time.RFC3339) +} + +// createArtifactFromScreenshot creates an artifact from the screenshot file +func (s *TakeScreenshotSkill) createArtifactFromScreenshot(ctx context.Context, filePath, imageType string) (url string, artifactID string, err error) { + task, ok := ctx.Value(server.TaskContextKey).(*types.Task) + if !ok { + return "", "", fmt.Errorf("task not found in context") } - metadata := map[string]any{ - "file_size": fileInfo.Size(), - "created_at": fileInfo.ModTime().Format(time.RFC3339), - "permissions": fileInfo.Mode().String(), - "full_page": fullPage, - "image_type": imageType, - "quality": quality, - "capture_type": "viewport", + artifactService, ok := ctx.Value(server.ArtifactServiceContextKey).(server.ArtifactService) + if !ok || artifactService == nil { + return "", "", fmt.Errorf("artifact service not available") } - if fullPage { - metadata["capture_type"] = "full_page" + data, err := os.ReadFile(filePath) + if err != nil { + return "", "", fmt.Errorf("failed to read screenshot file: %w", err) } - if selector != "" { - metadata["capture_type"] = "element" - metadata["selector"] = selector + mimeType := s.getMimeType(imageType) + + filename := filepath.Base(filePath) + artifact, err := artifactService.CreateFileArtifact( + fmt.Sprintf("Screenshot - %s", filename), + fmt.Sprintf("Screenshot captured at %s", s.getCurrentTimestamp()), + filename, + data, + &mimeType, + ) + if err != nil { + return "", "", fmt.Errorf("failed to create artifact: %w", err) } - return metadata, nil -} + artifactService.AddArtifactToTask(task, artifact) -// getCurrentTimestamp returns the current timestamp in RFC3339 format -func (s *TakeScreenshotSkill) getCurrentTimestamp() string { - return time.Now().Format(time.RFC3339) + if len(artifact.Parts) > 0 { + if filePart, ok := artifact.Parts[0].(types.FilePart); ok { + if fileWithURI, ok := filePart.File.(types.FileWithUri); ok { + return fileWithURI.URI, artifact.ArtifactID, nil + } + } + } + + return "", artifact.ArtifactID, nil }