diff --git a/.github/workflows/ci-doctor.lock.yml b/.github/workflows/ci-doctor.lock.yml index d0af43b9898..1ed0168c4f2 100644 --- a/.github/workflows/ci-doctor.lock.yml +++ b/.github/workflows/ci-doctor.lock.yml @@ -40,7 +40,7 @@ jobs: uses: actions/checkout@v5 # Cache configuration from frontmatter processed below - name: Cache (investigation-memory-${{ github.repository }}) - uses: actions/cache@v3 + uses: actions/cache@v4 with: key: investigation-memory-${{ github.repository }} path: | diff --git a/.github/workflows/dev.lock.yml b/.github/workflows/dev.lock.yml index c034fe16059..be5db7dd8b0 100644 --- a/.github/workflows/dev.lock.yml +++ b/.github/workflows/dev.lock.yml @@ -120,6 +120,17 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v5 + # Cache memory MCP configuration from frontmatter processed below + - name: Create cache-memory directory + run: mkdir -p /tmp/cache-memory + - name: Cache memory MCP data + uses: actions/cache@v4 + with: + key: memory-${{ github.workflow }}-${{ github.run_id }} + path: /tmp/cache-memory + restore-keys: | + memory-${{ github.workflow }}- + memory- - name: Generate Claude Settings run: | mkdir -p /tmp/.claude @@ -256,6 +267,15 @@ jobs: cat > /tmp/mcp-config/mcp-servers.json << 'EOF' { "mcpServers": { + "memory": { + "command": "npx", + "args": [ + "@modelcontextprotocol/server-memory" + ], + "env": { + "MEMORY_FILE_PATH": "/tmp/cache-memory/memory.json" + } + }, "github": { "command": "docker", "args": [ @@ -280,7 +300,16 @@ jobs: run: | mkdir -p /tmp/aw-prompts cat > $GITHUB_AW_PROMPT << 'EOF' + Before starting, read the entire memory graph and print it to the output as "My past poems..." + + Then: + Write a short poem. + - check if this poem is already in memory + - if already in memory, generate a new poem + + Before returning the poem: + - store generated poem in memory @@ -427,7 +456,8 @@ jobs: # - mcp__github__search_pull_requests # - mcp__github__search_repositories # - mcp__github__search_users - allowed_tools: "ExitPlanMode,Glob,Grep,LS,NotebookRead,Read,Task,TodoWrite,Write,mcp__github__download_workflow_run_artifact,mcp__github__get_code_scanning_alert,mcp__github__get_commit,mcp__github__get_dependabot_alert,mcp__github__get_discussion,mcp__github__get_discussion_comments,mcp__github__get_file_contents,mcp__github__get_issue,mcp__github__get_issue_comments,mcp__github__get_job_logs,mcp__github__get_me,mcp__github__get_notification_details,mcp__github__get_pull_request,mcp__github__get_pull_request_comments,mcp__github__get_pull_request_diff,mcp__github__get_pull_request_files,mcp__github__get_pull_request_reviews,mcp__github__get_pull_request_status,mcp__github__get_secret_scanning_alert,mcp__github__get_tag,mcp__github__get_workflow_run,mcp__github__get_workflow_run_logs,mcp__github__get_workflow_run_usage,mcp__github__list_branches,mcp__github__list_code_scanning_alerts,mcp__github__list_commits,mcp__github__list_dependabot_alerts,mcp__github__list_discussion_categories,mcp__github__list_discussions,mcp__github__list_issues,mcp__github__list_notifications,mcp__github__list_pull_requests,mcp__github__list_secret_scanning_alerts,mcp__github__list_tags,mcp__github__list_workflow_jobs,mcp__github__list_workflow_run_artifacts,mcp__github__list_workflow_runs,mcp__github__list_workflows,mcp__github__search_code,mcp__github__search_issues,mcp__github__search_orgs,mcp__github__search_pull_requests,mcp__github__search_repositories,mcp__github__search_users" + # - mcp__memory + allowed_tools: "ExitPlanMode,Glob,Grep,LS,NotebookRead,Read,Task,TodoWrite,Write,mcp__github__download_workflow_run_artifact,mcp__github__get_code_scanning_alert,mcp__github__get_commit,mcp__github__get_dependabot_alert,mcp__github__get_discussion,mcp__github__get_discussion_comments,mcp__github__get_file_contents,mcp__github__get_issue,mcp__github__get_issue_comments,mcp__github__get_job_logs,mcp__github__get_me,mcp__github__get_notification_details,mcp__github__get_pull_request,mcp__github__get_pull_request_comments,mcp__github__get_pull_request_diff,mcp__github__get_pull_request_files,mcp__github__get_pull_request_reviews,mcp__github__get_pull_request_status,mcp__github__get_secret_scanning_alert,mcp__github__get_tag,mcp__github__get_workflow_run,mcp__github__get_workflow_run_logs,mcp__github__get_workflow_run_usage,mcp__github__list_branches,mcp__github__list_code_scanning_alerts,mcp__github__list_commits,mcp__github__list_dependabot_alerts,mcp__github__list_discussion_categories,mcp__github__list_discussions,mcp__github__list_issues,mcp__github__list_notifications,mcp__github__list_pull_requests,mcp__github__list_secret_scanning_alerts,mcp__github__list_tags,mcp__github__list_workflow_jobs,mcp__github__list_workflow_run_artifacts,mcp__github__list_workflow_runs,mcp__github__list_workflows,mcp__github__search_code,mcp__github__search_issues,mcp__github__search_orgs,mcp__github__search_pull_requests,mcp__github__search_repositories,mcp__github__search_users,mcp__memory" anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} claude_env: | GITHUB_AW_SAFE_OUTPUTS: ${{ env.GITHUB_AW_SAFE_OUTPUTS }} diff --git a/.github/workflows/dev.md b/.github/workflows/dev.md index 9587c75e41e..880fc5cf859 100644 --- a/.github/workflows/dev.md +++ b/.github/workflows/dev.md @@ -11,12 +11,23 @@ safe-outputs: engine: id: claude max-turns: 5 +tools: + cache-memory: true permissions: read-all concurrency: group: "gh-aw-${{ github.workflow }}-${{ github.ref }}" --- +Before starting, read the entire memory graph and print it to the output as "My past poems..." + +Then: + Write a short poem. +- check if this poem is already in memory +- if already in memory, generate a new poem + +Before returning the poem: +- store generated poem in memory \ No newline at end of file diff --git a/docs/src/content/docs/reference/cache-memory.md b/docs/src/content/docs/reference/cache-memory.md new file mode 100644 index 00000000000..85bb97516ed --- /dev/null +++ b/docs/src/content/docs/reference/cache-memory.md @@ -0,0 +1,433 @@ +--- +title: Cache Memory - Persistent Memory for Agentic Workflows +description: Complete guide to using cache-memory for persistent memory across workflow runs using GitHub Actions cache and MCP memory servers. +--- + +# Cache Memory - Persistent Memory for Agentic Workflows + +The `cache-memory` feature enables agentic workflows to maintain persistent memory across workflow runs by integrating the Model Context Protocol (MCP) memory server with GitHub Actions cache. + +## Overview + +Cache Memory provides: + +- **Persistent Memory**: AI agents can remember information across multiple workflow runs +- **GitHub Actions Integration**: Built on top of GitHub Actions cache infrastructure +- **MCP Memory Server**: Uses the official Model Context Protocol memory server via npx +- **Automatic Configuration**: Seamlessly integrates with Claude and Custom engines +- **Smart Caching**: Intelligent cache key generation and restoration strategies + +## How It Works + +When `cache-memory` is enabled, the workflow compiler automatically: + +1. **Mounts Memory MCP Server**: Configures an official MCP memory server via npx +2. **Creates Cache Steps**: Adds GitHub Actions cache steps to restore and save memory data +3. **Persistent Storage**: Maps `/tmp/cache-memory` to store memory data files +4. **Cache Key Management**: Generates intelligent cache keys with progressive fallback +5. **Tool Integration**: Adds "memory" tool to the MCP configuration for AI engines + +## Basic Usage + +### Simple Enable + +Enable cache-memory with default settings: + +```yaml +--- +engine: claude +tools: + cache-memory: true + github: + allowed: [get_repository] +--- +``` + +This uses: +- **Default cache key**: `memory-${{ github.workflow }}-${{ github.run_id }}` +- **Default setup**: Uses `npx @modelcontextprotocol/server-memory` +- **Default storage path**: `/tmp/cache-memory` for memory data files + +### Advanced Configuration + +Customize cache key and artifact retention: + +```yaml +--- +engine: claude +tools: + cache-memory: + key: custom-memory-${{ github.workflow }}-${{ github.run_id }} + retention-days: 30 + github: + allowed: [get_repository] +--- +``` + +### Legacy Docker Configuration (Deprecated) + +**Note**: Docker image configuration is deprecated. The cache-memory tool now uses npx for improved compatibility and simplified setup. + +For legacy configurations, the docker-image field is ignored: + +```yaml +--- +engine: claude +tools: + cache-memory: + # docker-image field is deprecated and ignored + docker-image: "ghcr.io/modelcontextprotocol/server-memory:v1.0.0" + retention-days: 7 + github: + allowed: [get_repository] +--- +``` + +### Artifact Retention + +Configure how long memory data artifacts are retained: + +```yaml +--- +engine: claude +tools: + cache-memory: + key: persistent-memory + retention-days: 90 # Keep artifacts for 90 days (1-90 range) + github: + allowed: [get_repository] +--- +``` + +The `retention-days` option controls the `actions/upload-artifact` retention period: +- **Range**: 1-90 days +- **Default**: Repository setting (if not specified) +- **Purpose**: Provides alternative access to memory data beyond cache expiration + +## Cache Behavior and GitHub Actions Integration + +### Cache Key Strategy + +Cache Memory builds on GitHub Actions cache infrastructure with these behaviors: + +#### Automatic Key Generation + +- **Default Pattern**: `memory-${{ github.workflow }}-${{ github.run_id }}` +- **Custom Keys**: Any custom key gets `-${{ github.run_id }}` appended automatically +- **Example**: `project-memory` becomes `project-memory-${{ github.run_id }}` + +#### Progressive Restore Keys + +Restore keys are automatically generated by splitting the cache key on dashes, creating a fallback hierarchy: + +For key `custom-memory-project-v1-${{ github.run_id }}`, restore keys are: +``` +custom-memory-project-v1- +custom-memory-project- +custom-memory- +custom- +``` + +This ensures the most specific match is found first, with progressive fallbacks. + +### GitHub Actions Cache Integration + +Cache Memory leverages GitHub Actions cache with these characteristics: + +#### Cache Retention +- **Retention Period**: 7 days (GitHub Actions standard) +- **Size Limit**: 10GB per repository (GitHub Actions standard) +- **LRU Eviction**: Least recently used caches are evicted when limits are reached + +#### Artifact Upload (Optional) +When `retention-days` is configured, memory data is also uploaded as artifacts: +- **Retention Period**: 1-90 days (configurable via `retention-days`) +- **Purpose**: Alternative access to memory data beyond cache expiration +- **Use Case**: Long-term memory persistence for workflows that run infrequently + +#### Cache Scoping +- **Branch Scoping**: Caches are accessible across branches in the same repository +- **Workflow Scoping**: Each workflow maintains its own cache namespace by default +- **Run Scoping**: Each run gets unique cache keys to prevent conflicts + +### Update Behavior + +#### First Run +- **No Cache Hit**: Memory starts empty +- **Storage**: New memories are stored in `/tmp/cache-memory` +- **Cache Save**: Memory data is cached at workflow completion + +#### Subsequent Runs +- **Cache Restore**: Previous memory data is restored from cache +- **Memory Continuity**: AI agent can access previously stored information +- **Incremental Updates**: New memories are added to existing data +- **Cache Update**: Updated memory data is saved with new cache key + +#### Cache Expiration +- **Automatic Expiration**: Caches expire after 7 days of inactivity +- **Manual Cleanup**: Repository administrators can clear caches manually +- **Version Management**: Different cache keys allow for memory versioning + +## Generated Workflow Steps + +When cache-memory is enabled, these steps are automatically added to your workflow: + +### Basic Configuration (Cache Only) + +```yaml +# Cache memory MCP configuration from frontmatter processed below +- name: Create cache-memory directory + run: mkdir -p /tmp/cache-memory + +- name: Cache memory MCP data + uses: actions/cache@v4 + with: + key: memory-${{ github.workflow }}-${{ github.run_id }} + path: /tmp/cache-memory + restore-keys: | + memory-${{ github.workflow }}- + memory- +``` + +### With Artifact Upload (retention-days configured) + +```yaml +# Cache memory MCP configuration from frontmatter processed below +- name: Create cache-memory directory + run: mkdir -p /tmp/cache-memory + +- name: Cache memory MCP data + uses: actions/cache@v4 + with: + key: memory-${{ github.workflow }}-${{ github.run_id }} + path: /tmp/cache-memory + restore-keys: | + memory-${{ github.workflow }}- + memory- + +- name: Upload memory MCP data as artifact + uses: actions/upload-artifact@v4 + with: + name: cache-memory-data + path: /tmp/cache-memory + retention-days: 30 +``` + +## MCP Server Configuration + +The memory server is configured using npx following official MCP documentation: + +```json +"memory": { + "command": "npx", + "args": [ + "@modelcontextprotocol/server-memory" + ], + "env": { + "MEMORY_FILE_PATH": "/tmp/cache-memory/memory.json" + } +} +``` + +### Benefits of npx Setup + +#### Simplified Installation +- **No Docker Required**: Runs directly using npx package manager +- **Automatic Updates**: Gets latest package versions automatically +- **Faster Startup**: No container overhead or image pulling +- **Universal Compatibility**: Works on any runner with Node.js/npm + +#### Package Management +- **Official Package**: Uses `@modelcontextprotocol/server-memory` from npm +- **Version Control**: Inherits npm's version resolution and dependency management +- **Security**: Leverages npm's security scanning and vulnerability detection + +## Memory Operations + +### Storing Information + +The AI agent can store information using the memory tool: + +``` +Remember that the user prefers verbose error messages when debugging. +``` + +### Retrieving Information + +The AI agent can query its memory: + +``` +What do I know about the user's debugging preferences? +``` + +### Memory Categories + +The MCP memory server organizes information into categories: + +- **Basic Identity**: User identification and context +- **Behaviors**: Observed patterns and preferences +- **Preferences**: Explicit user preferences +- **Goals**: Stated objectives and tasks +- **Relationships**: Connections between entities + +## Best Practices + +### Cache Key Naming + +Use descriptive, hierarchical cache keys: + +```yaml +tools: + cache-memory: + key: project-${{ github.repository_owner }}-${{ github.workflow }} +``` + +### Memory Scope + +Consider the scope of memory needed: + +- **Workflow-specific**: Default behavior, memory per workflow +- **Repository-wide**: Use repository name in cache key +- **User-specific**: Include user information in cache key + +### Resource Management + +Be mindful of cache usage: + +- **Memory Size**: Monitor memory data growth over time +- **Cache Limits**: Respect GitHub's 10GB repository cache limit +- **Cleanup Strategy**: Consider periodic cache clearing for long-running projects + +### Node.js and npm Requirements + +Ensure runner compatibility: + +- **Node.js**: GitHub runners include Node.js by default +- **npm/npx**: Available in all GitHub-hosted runners +- **Package Access**: Ensure access to npm registry for package installation + +## Troubleshooting + +### Common Issues + +#### Memory Not Persisting +- **Check Cache Keys**: Ensure keys are consistent between runs +- **Verify Paths**: Confirm `/tmp/cache-memory` directory exists +- **Review Logs**: Check workflow logs for cache restore/save messages + +#### Package Installation Issues +- **npm Registry Access**: Verify runner can access npm registry +- **Package Availability**: Confirm `@modelcontextprotocol/server-memory` package exists +- **Network Access**: Ensure runner has internet connectivity for package installation + +#### Cache Size Issues +- **Monitor Usage**: Track cache size growth over time +- **Cleanup Strategy**: Implement periodic cache clearing +- **Key Rotation**: Use time-based cache keys for automatic expiration + +### Debugging + +Enable verbose logging to debug cache-memory issues: + +```yaml +--- +engine: claude +tools: + cache-memory: true +timeout_minutes: 10 # Allow time for debugging +--- + +# Debug Cache Memory + +Please debug the cache-memory functionality by: + +1. Checking what's currently in memory +2. Storing a test message +3. Retrieving the stored message +4. Reporting on memory persistence +``` + +## Security Considerations + +### Data Privacy + +- **Sensitive Data**: Avoid storing sensitive information in memory +- **Access Control**: Memory data follows repository access permissions +- **Audit Trail**: Cache access is logged in workflow execution logs + +### Package Security + +- **Official Package**: Use only the official `@modelcontextprotocol/server-memory` package +- **Dependency Scanning**: npm automatically scans for vulnerabilities +- **Audit Trail**: Package installation is logged in workflow execution logs + +## Examples + +### Basic Memory Usage + +```yaml +--- +engine: claude +on: + workflow_dispatch: + inputs: + remember: + description: 'Information to remember' + required: true + +tools: + cache-memory: true + github: + allowed: [get_repository] +--- + +# Memory Test Workflow + +Store and retrieve information across workflow runs. + +## Task + +1. Check what you remember from previous runs +2. Store the new information: "${{ inputs.remember }}" +3. List all stored memories +4. Provide a summary of memory persistence +``` + +### Project-Specific Memory + +```yaml +--- +engine: claude +tools: + cache-memory: + key: project-docs-${{ github.repository }}-${{ github.workflow }} + github: + allowed: [get_repository, list_files] +--- + +# Documentation Assistant + +Use project-specific memory to maintain context about documentation updates. +``` + +### Multi-Workflow Memory Sharing + +```yaml +--- +engine: claude +tools: + cache-memory: + key: shared-memory-${{ github.repository }} +--- + +# Shared Memory Workflow + +Share memory data across multiple workflows in the same repository. +``` + +## Related Documentation + +- [Frontmatter Options](frontmatter.md) - Complete frontmatter configuration guide +- [MCP Tools](mcps.md) - Model Context Protocol integration +- [Safe Outputs](safe-outputs.md) - Output processing and automation +- [GitHub Actions Cache Documentation](https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows) - Official GitHub cache documentation \ No newline at end of file diff --git a/docs/src/content/docs/reference/frontmatter.md b/docs/src/content/docs/reference/frontmatter.md index a5211bee432..af1db8a0d9b 100644 --- a/docs/src/content/docs/reference/frontmatter.md +++ b/docs/src/content/docs/reference/frontmatter.md @@ -25,6 +25,7 @@ The YAML frontmatter supports standard GitHub Actions properties plus additional - `network`: Network access control for AI engines - `tools`: Available tools and MCP servers for the AI engine - `cache`: Cache configuration for workflow dependencies +- `cache-memory`: [Persistent memory configuration](cache-memory.md) using GitHub Actions cache and MCP memory servers - `safe-outputs`: [Safe Output Processing](safe-outputs.md) for automatic issue creation and comment posting. ## Trigger Events (`on:`) diff --git a/docs/src/content/docs/reference/index.md b/docs/src/content/docs/reference/index.md index 28f686cb06c..af6330a0d3f 100644 --- a/docs/src/content/docs/reference/index.md +++ b/docs/src/content/docs/reference/index.md @@ -14,6 +14,7 @@ Write agentic workflows in natural language markdown, and run them in GitHub Act - **[Workflow Structure](workflow-structure.md)** - Directory layout, file organization, and expression security - **[Frontmatter Options](frontmatter.md)** - All configuration options for workflows +- **[Cache Memory](cache-memory.md)** - Persistent memory across workflow runs using GitHub Actions cache - **[Safe Output Processing](safe-outputs.md)** - Automatic issue, comment, and PR creation from agentic workflow output - **[Include Directives](include-directives.md)** - Modularizing workflows with includes - **[Alias Triggers](alias-triggers.md)** - Special @mention triggers and context text diff --git a/pkg/cli/templates/instructions.md b/pkg/cli/templates/instructions.md index e42521e19ef..3a8aba1d4b5 100644 --- a/pkg/cli/templates/instructions.md +++ b/pkg/cli/templates/instructions.md @@ -216,6 +216,7 @@ The YAML frontmatter supports these fields: - **`alias:`** - Alternative workflow name (string) - **`cache:`** - Cache configuration for workflow dependencies (object or array) +- **`cache-memory:`** - Memory MCP server with persistent cache storage (boolean or object) ### Cache Configuration @@ -256,6 +257,37 @@ cache: Cache steps are automatically added to the workflow job and the cache configuration is removed from the final `.lock.yml` file. +### Cache Memory Configuration + +The `cache-memory:` field enables persistent memory storage for agentic workflows using the @modelcontextprotocol/server-memory MCP server: + +**Simple Enable:** +```yaml +cache-memory: true +``` + +**Advanced Configuration:** +```yaml +cache-memory: + key: custom-memory-${{ github.run_id }} +``` + +**How It Works:** +- Mounts a memory MCP server at `/tmp/cache-memory/` that persists across workflow runs +- Uses `actions/cache` with resolution field so the last cache wins +- Automatically adds the memory MCP server to available tools +- Cache steps are automatically added to the workflow job +- Restore keys are automatically generated by splitting the cache key on '-' + +**Supported Parameters:** +- `key:` - Custom cache key (defaults to `memory-${{ github.workflow }}-${{ github.run_id }}`) + +**Restore Key Generation:** +The system automatically generates restore keys by progressively splitting the cache key on '-': +- Key: `custom-memory-project-v1-123` → Restore keys: `custom-memory-project-v1-`, `custom-memory-project-`, `custom-memory-` + +The memory MCP server is automatically configured when `cache-memory` is enabled and works with both Claude and Custom engines. + ## Output Processing and Issue Creation ### Automatic GitHub Issue Creation diff --git a/pkg/cli/workflows/test-claude-cache-memory.lock.yml b/pkg/cli/workflows/test-claude-cache-memory.lock.yml new file mode 100644 index 00000000000..1dd12e23ef5 --- /dev/null +++ b/pkg/cli/workflows/test-claude-cache-memory.lock.yml @@ -0,0 +1,865 @@ +# This file was automatically generated by gh-aw. DO NOT EDIT. +# To update this file, edit the corresponding .md file and run: +# gh aw compile + +name: "Test Claude with Cache Memory and Custom Docker Image" +on: + workflow_dispatch: + inputs: + task: + default: Store this information for later + description: Task to remember + required: true + +permissions: {} + +concurrency: + group: "gh-aw-${{ github.workflow }}" + +run-name: "Test Claude with Cache Memory and Custom Docker Image" + +jobs: + test-claude-with-cache-memory-and-custom-docker-image: + runs-on: ubuntu-latest + permissions: read-all + steps: + - name: Checkout repository + uses: actions/checkout@v5 + # Cache memory MCP configuration from frontmatter processed below + - name: Create cache-memory directory + run: mkdir -p /tmp/cache-memory + - name: Cache memory MCP data + uses: actions/cache@v4 + with: + key: memory-${{ github.workflow }}-${{ github.run_id }} + path: /tmp/cache-memory + restore-keys: | + memory-${{ github.workflow }}- + memory- + - name: Upload memory MCP data as artifact + uses: actions/upload-artifact@v4 + with: + name: cache-memory-data + path: /tmp/cache-memory + retention-days: 14 + - name: Generate Claude Settings + run: | + mkdir -p /tmp/.claude + cat > /tmp/.claude/settings.json << 'EOF' + { + "hooks": { + "PreToolUse": [ + { + "matcher": "WebFetch|WebSearch", + "hooks": [ + { + "type": "command", + "command": ".claude/hooks/network_permissions.py" + } + ] + } + ] + } + } + EOF + - name: Generate Network Permissions Hook + run: | + mkdir -p .claude/hooks + cat > .claude/hooks/network_permissions.py << 'EOF' + #!/usr/bin/env python3 + """ + Network permissions validator for Claude Code engine. + Generated by gh-aw from engine network permissions configuration. + """ + + import json + import sys + import urllib.parse + import re + + # Domain allow-list (populated during generation) + ALLOWED_DOMAINS = ["crl3.digicert.com","crl4.digicert.com","ocsp.digicert.com","ts-crl.ws.symantec.com","ts-ocsp.ws.symantec.com","crl.geotrust.com","ocsp.geotrust.com","crl.thawte.com","ocsp.thawte.com","crl.verisign.com","ocsp.verisign.com","crl.globalsign.com","ocsp.globalsign.com","crls.ssl.com","ocsp.ssl.com","crl.identrust.com","ocsp.identrust.com","crl.sectigo.com","ocsp.sectigo.com","crl.usertrust.com","ocsp.usertrust.com","s.symcb.com","s.symcd.com","json-schema.org","json.schemastore.org","archive.ubuntu.com","security.ubuntu.com","ppa.launchpad.net","keyserver.ubuntu.com","azure.archive.ubuntu.com","api.snapcraft.io","packagecloud.io","packages.cloud.google.com","packages.microsoft.com"] + + def extract_domain(url_or_query): + """Extract domain from URL or search query.""" + if not url_or_query: + return None + + if url_or_query.startswith(('http://', 'https://')): + return urllib.parse.urlparse(url_or_query).netloc.lower() + + # Check for domain patterns in search queries + match = re.search(r'site:([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', url_or_query) + if match: + return match.group(1).lower() + + return None + + def is_domain_allowed(domain): + """Check if domain is allowed.""" + if not domain: + # If no domain detected, allow only if not under deny-all policy + return bool(ALLOWED_DOMAINS) # False if empty list (deny-all), True if has domains + + # Empty allowed domains means deny all + if not ALLOWED_DOMAINS: + return False + + for pattern in ALLOWED_DOMAINS: + regex = pattern.replace('.', r'\.').replace('*', '.*') + if re.match(f'^{regex}$', domain): + return True + return False + + # Main logic + try: + data = json.load(sys.stdin) + tool_name = data.get('tool_name', '') + tool_input = data.get('tool_input', {}) + + if tool_name not in ['WebFetch', 'WebSearch']: + sys.exit(0) # Allow other tools + + target = tool_input.get('url') or tool_input.get('query', '') + domain = extract_domain(target) + + # For WebSearch, apply domain restrictions consistently + # If no domain detected in search query, check if restrictions are in place + if tool_name == 'WebSearch' and not domain: + # Since this hook is only generated when network permissions are configured, + # empty ALLOWED_DOMAINS means deny-all policy + if not ALLOWED_DOMAINS: # Empty list means deny all + print(f"Network access blocked: deny-all policy in effect", file=sys.stderr) + print(f"No domains are allowed for WebSearch", file=sys.stderr) + sys.exit(2) # Block under deny-all policy + else: + print(f"Network access blocked for web-search: no specific domain detected", file=sys.stderr) + print(f"Allowed domains: {', '.join(ALLOWED_DOMAINS)}", file=sys.stderr) + sys.exit(2) # Block general searches when domain allowlist is configured + + if not is_domain_allowed(domain): + print(f"Network access blocked for domain: {domain}", file=sys.stderr) + print(f"Allowed domains: {', '.join(ALLOWED_DOMAINS)}", file=sys.stderr) + sys.exit(2) # Block with feedback to Claude + + sys.exit(0) # Allow + + except Exception as e: + print(f"Network validation error: {e}", file=sys.stderr) + sys.exit(2) # Block on errors + + EOF + chmod +x .claude/hooks/network_permissions.py + - name: Setup MCPs + run: | + mkdir -p /tmp/mcp-config + cat > /tmp/mcp-config/mcp-servers.json << 'EOF' + { + "mcpServers": { + "memory": { + "command": "npx", + "args": [ + "@modelcontextprotocol/server-memory" + ], + "env": { + "MEMORY_FILE_PATH": "/tmp/cache-memory/memory.json" + } + }, + "github": { + "command": "docker", + "args": [ + "run", + "-i", + "--rm", + "-e", + "GITHUB_PERSONAL_ACCESS_TOKEN", + "ghcr.io/github/github-mcp-server:sha-09deac4" + ], + "env": { + "GITHUB_PERSONAL_ACCESS_TOKEN": "${{ secrets.GITHUB_TOKEN }}" + } + } + } + } + EOF + - name: Create prompt + env: + GITHUB_AW_PROMPT: /tmp/aw-prompts/prompt.txt + run: | + mkdir -p /tmp/aw-prompts + cat > $GITHUB_AW_PROMPT << 'EOF' + # Test Claude with Cache Memory and Custom Docker Image + + You are a test agent that demonstrates the cache-memory functionality with Claude engine using a custom Docker image. + + ## Task + + Your job is to: + + 1. **Store a test task** in your memory using the memory MCP server + 2. **Retrieve any previous tasks** that you've stored in memory + 3. **Report on the memory contents** including both current and historical tasks + 4. **Use GitHub tools** to get basic repository information + + ## Instructions + + 1. First, use the memory tool to see what you already know from previous runs + 2. Store a new test task: "Test task for run ${{ github.run_number }}" in your memory + 3. List all tasks you now have in memory + 4. Get basic information about this repository using the GitHub tool + 5. Provide a summary of: + - What you remembered from before + - What you just stored + - Basic repository information + + ## Expected Behavior + + - **First run**: Should show empty memory, then store the new task + - **Subsequent runs**: Should show previously stored tasks, then add the new one + - **Memory persistence**: Tasks should persist across workflow runs thanks to cache-memory + - **Custom Docker image**: Uses ghcr.io/modelcontextprotocol/server-memory:v1.0.0 instead of latest + - **Artifact upload**: Memory data is also uploaded as artifact with 14-day retention + + This workflow tests that the cache-memory configuration properly: + - Mounts the memory MCP server with custom Docker image + - Persists data between runs using GitHub Actions cache + - Uploads memory data as artifacts with configurable retention + - Works with Claude engine and MCP tools + - Integrates with other tools like GitHub + + EOF + - name: Print prompt to step summary + run: | + echo "## Generated Prompt" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo '``````markdown' >> $GITHUB_STEP_SUMMARY + cat $GITHUB_AW_PROMPT >> $GITHUB_STEP_SUMMARY + echo '``````' >> $GITHUB_STEP_SUMMARY + env: + GITHUB_AW_PROMPT: /tmp/aw-prompts/prompt.txt + - name: Generate agentic run info + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + + const awInfo = { + engine_id: "claude", + engine_name: "Claude Code", + model: "", + version: "", + workflow_name: "Test Claude with Cache Memory and Custom Docker Image", + experimental: false, + supports_tools_whitelist: true, + supports_http_transport: true, + run_id: context.runId, + run_number: context.runNumber, + run_attempt: process.env.GITHUB_RUN_ATTEMPT, + repository: context.repo.owner + '/' + context.repo.repo, + ref: context.ref, + sha: context.sha, + actor: context.actor, + event_name: context.eventName, + staged: false, + created_at: new Date().toISOString() + }; + + // Write to /tmp directory to avoid inclusion in PR + const tmpPath = '/tmp/aw_info.json'; + fs.writeFileSync(tmpPath, JSON.stringify(awInfo, null, 2)); + console.log('Generated aw_info.json at:', tmpPath); + console.log(JSON.stringify(awInfo, null, 2)); + - name: Upload agentic run info + if: always() + uses: actions/upload-artifact@v4 + with: + name: aw_info.json + path: /tmp/aw_info.json + if-no-files-found: warn + - name: Execute Claude Code Action + id: agentic_execution + uses: anthropics/claude-code-base-action@v0.0.56 + with: + # Allowed tools (sorted): + # - ExitPlanMode + # - Glob + # - Grep + # - LS + # - NotebookRead + # - Read + # - Task + # - TodoWrite + # - mcp__github__download_workflow_run_artifact + # - mcp__github__get_code_scanning_alert + # - mcp__github__get_commit + # - mcp__github__get_dependabot_alert + # - mcp__github__get_discussion + # - mcp__github__get_discussion_comments + # - mcp__github__get_file_contents + # - mcp__github__get_issue + # - mcp__github__get_issue_comments + # - mcp__github__get_job_logs + # - mcp__github__get_me + # - mcp__github__get_notification_details + # - mcp__github__get_pull_request + # - mcp__github__get_pull_request_comments + # - mcp__github__get_pull_request_diff + # - mcp__github__get_pull_request_files + # - mcp__github__get_pull_request_reviews + # - mcp__github__get_pull_request_status + # - mcp__github__get_repository + # - mcp__github__get_secret_scanning_alert + # - mcp__github__get_tag + # - mcp__github__get_workflow_run + # - mcp__github__get_workflow_run_logs + # - mcp__github__get_workflow_run_usage + # - mcp__github__list_branches + # - mcp__github__list_code_scanning_alerts + # - mcp__github__list_commits + # - mcp__github__list_dependabot_alerts + # - mcp__github__list_discussion_categories + # - mcp__github__list_discussions + # - mcp__github__list_issues + # - mcp__github__list_notifications + # - mcp__github__list_pull_requests + # - mcp__github__list_secret_scanning_alerts + # - mcp__github__list_tags + # - mcp__github__list_workflow_jobs + # - mcp__github__list_workflow_run_artifacts + # - mcp__github__list_workflow_runs + # - mcp__github__list_workflows + # - mcp__github__search_code + # - mcp__github__search_issues + # - mcp__github__search_orgs + # - mcp__github__search_pull_requests + # - mcp__github__search_repositories + # - mcp__github__search_users + # - mcp__memory + allowed_tools: "ExitPlanMode,Glob,Grep,LS,NotebookRead,Read,Task,TodoWrite,mcp__github__download_workflow_run_artifact,mcp__github__get_code_scanning_alert,mcp__github__get_commit,mcp__github__get_dependabot_alert,mcp__github__get_discussion,mcp__github__get_discussion_comments,mcp__github__get_file_contents,mcp__github__get_issue,mcp__github__get_issue_comments,mcp__github__get_job_logs,mcp__github__get_me,mcp__github__get_notification_details,mcp__github__get_pull_request,mcp__github__get_pull_request_comments,mcp__github__get_pull_request_diff,mcp__github__get_pull_request_files,mcp__github__get_pull_request_reviews,mcp__github__get_pull_request_status,mcp__github__get_repository,mcp__github__get_secret_scanning_alert,mcp__github__get_tag,mcp__github__get_workflow_run,mcp__github__get_workflow_run_logs,mcp__github__get_workflow_run_usage,mcp__github__list_branches,mcp__github__list_code_scanning_alerts,mcp__github__list_commits,mcp__github__list_dependabot_alerts,mcp__github__list_discussion_categories,mcp__github__list_discussions,mcp__github__list_issues,mcp__github__list_notifications,mcp__github__list_pull_requests,mcp__github__list_secret_scanning_alerts,mcp__github__list_tags,mcp__github__list_workflow_jobs,mcp__github__list_workflow_run_artifacts,mcp__github__list_workflow_runs,mcp__github__list_workflows,mcp__github__search_code,mcp__github__search_issues,mcp__github__search_orgs,mcp__github__search_pull_requests,mcp__github__search_repositories,mcp__github__search_users,mcp__memory" + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + mcp_config: /tmp/mcp-config/mcp-servers.json + prompt_file: /tmp/aw-prompts/prompt.txt + settings: /tmp/.claude/settings.json + timeout_minutes: 5 + env: + GITHUB_AW_PROMPT: /tmp/aw-prompts/prompt.txt + - name: Capture Agentic Action logs + if: always() + run: | + # Copy the detailed execution file from Agentic Action if available + if [ -n "${{ steps.agentic_execution.outputs.execution_file }}" ] && [ -f "${{ steps.agentic_execution.outputs.execution_file }}" ]; then + cp ${{ steps.agentic_execution.outputs.execution_file }} /tmp/test-claude-with-cache-memory-and-custom-docker-image.log + else + echo "No execution file output found from Agentic Action" >> /tmp/test-claude-with-cache-memory-and-custom-docker-image.log + fi + + # Ensure log file exists + touch /tmp/test-claude-with-cache-memory-and-custom-docker-image.log + - name: Upload engine output files + uses: actions/upload-artifact@v4 + with: + name: agent_outputs + path: | + output.txt + if-no-files-found: ignore + - name: Clean up engine output files + run: | + rm -f output.txt + - name: Parse agent logs for step summary + if: always() + uses: actions/github-script@v7 + env: + GITHUB_AW_AGENT_OUTPUT: /tmp/test-claude-with-cache-memory-and-custom-docker-image.log + with: + script: | + function main() { + const fs = require("fs"); + try { + // Get the log file path from environment + const logFile = process.env.GITHUB_AW_AGENT_OUTPUT; + if (!logFile) { + core.info("No agent log file specified"); + return; + } + if (!fs.existsSync(logFile)) { + core.info(`Log file not found: ${logFile}`); + return; + } + const logContent = fs.readFileSync(logFile, "utf8"); + const result = parseClaudeLog(logContent); + // Append to GitHub step summary + core.summary.addRaw(result.markdown).write(); + // Check for MCP server failures and fail the job if any occurred + if (result.mcpFailures && result.mcpFailures.length > 0) { + const failedServers = result.mcpFailures.join(", "); + core.setFailed(`MCP server(s) failed to launch: ${failedServers}`); + } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + core.setFailed(errorMessage); + } + } + /** + * Parses Claude log content and converts it to markdown format + * @param {string} logContent - The raw log content as a string + * @returns {{markdown: string, mcpFailures: string[]}} Result with formatted markdown content and MCP failure list + */ + function parseClaudeLog(logContent) { + try { + const logEntries = JSON.parse(logContent); + if (!Array.isArray(logEntries)) { + return { + markdown: + "## Agent Log Summary\n\nLog format not recognized as Claude JSON array.\n", + mcpFailures: [], + }; + } + let markdown = ""; + const mcpFailures = []; + // Check for initialization data first + const initEntry = logEntries.find( + entry => entry.type === "system" && entry.subtype === "init" + ); + if (initEntry) { + markdown += "## 🚀 Initialization\n\n"; + const initResult = formatInitializationSummary(initEntry); + markdown += initResult.markdown; + mcpFailures.push(...initResult.mcpFailures); + markdown += "\n"; + } + markdown += "## 🤖 Commands and Tools\n\n"; + const toolUsePairs = new Map(); // Map tool_use_id to tool_result + const commandSummary = []; // For the succinct summary + // First pass: collect tool results by tool_use_id + for (const entry of logEntries) { + if (entry.type === "user" && entry.message?.content) { + for (const content of entry.message.content) { + if (content.type === "tool_result" && content.tool_use_id) { + toolUsePairs.set(content.tool_use_id, content); + } + } + } + } + // Collect all tool uses for summary + for (const entry of logEntries) { + if (entry.type === "assistant" && entry.message?.content) { + for (const content of entry.message.content) { + if (content.type === "tool_use") { + const toolName = content.name; + const input = content.input || {}; + // Skip internal tools - only show external commands and API calls + if ( + [ + "Read", + "Write", + "Edit", + "MultiEdit", + "LS", + "Grep", + "Glob", + "TodoWrite", + ].includes(toolName) + ) { + continue; // Skip internal file operations and searches + } + // Find the corresponding tool result to get status + const toolResult = toolUsePairs.get(content.id); + let statusIcon = "❓"; + if (toolResult) { + statusIcon = toolResult.is_error === true ? "❌" : "✅"; + } + // Add to command summary (only external tools) + if (toolName === "Bash") { + const formattedCommand = formatBashCommand(input.command || ""); + commandSummary.push(`* ${statusIcon} \`${formattedCommand}\``); + } else if (toolName.startsWith("mcp__")) { + const mcpName = formatMcpName(toolName); + commandSummary.push(`* ${statusIcon} \`${mcpName}(...)\``); + } else { + // Handle other external tools (if any) + commandSummary.push(`* ${statusIcon} ${toolName}`); + } + } + } + } + } + // Add command summary + if (commandSummary.length > 0) { + for (const cmd of commandSummary) { + markdown += `${cmd}\n`; + } + } else { + markdown += "No commands or tools used.\n"; + } + // Add Information section from the last entry with result metadata + markdown += "\n## 📊 Information\n\n"; + // Find the last entry with metadata + const lastEntry = logEntries[logEntries.length - 1]; + if ( + lastEntry && + (lastEntry.num_turns || + lastEntry.duration_ms || + lastEntry.total_cost_usd || + lastEntry.usage) + ) { + if (lastEntry.num_turns) { + markdown += `**Turns:** ${lastEntry.num_turns}\n\n`; + } + if (lastEntry.duration_ms) { + const durationSec = Math.round(lastEntry.duration_ms / 1000); + const minutes = Math.floor(durationSec / 60); + const seconds = durationSec % 60; + markdown += `**Duration:** ${minutes}m ${seconds}s\n\n`; + } + if (lastEntry.total_cost_usd) { + markdown += `**Total Cost:** $${lastEntry.total_cost_usd.toFixed(4)}\n\n`; + } + if (lastEntry.usage) { + const usage = lastEntry.usage; + if (usage.input_tokens || usage.output_tokens) { + markdown += `**Token Usage:**\n`; + if (usage.input_tokens) + markdown += `- Input: ${usage.input_tokens.toLocaleString()}\n`; + if (usage.cache_creation_input_tokens) + markdown += `- Cache Creation: ${usage.cache_creation_input_tokens.toLocaleString()}\n`; + if (usage.cache_read_input_tokens) + markdown += `- Cache Read: ${usage.cache_read_input_tokens.toLocaleString()}\n`; + if (usage.output_tokens) + markdown += `- Output: ${usage.output_tokens.toLocaleString()}\n`; + markdown += "\n"; + } + } + if ( + lastEntry.permission_denials && + lastEntry.permission_denials.length > 0 + ) { + markdown += `**Permission Denials:** ${lastEntry.permission_denials.length}\n\n`; + } + } + markdown += "\n## 🤖 Reasoning\n\n"; + // Second pass: process assistant messages in sequence + for (const entry of logEntries) { + if (entry.type === "assistant" && entry.message?.content) { + for (const content of entry.message.content) { + if (content.type === "text" && content.text) { + // Add reasoning text directly (no header) + const text = content.text.trim(); + if (text && text.length > 0) { + markdown += text + "\n\n"; + } + } else if (content.type === "tool_use") { + // Process tool use with its result + const toolResult = toolUsePairs.get(content.id); + const toolMarkdown = formatToolUse(content, toolResult); + if (toolMarkdown) { + markdown += toolMarkdown; + } + } + } + } + } + return { markdown, mcpFailures }; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + return { + markdown: `## Agent Log Summary\n\nError parsing Claude log: ${errorMessage}\n`, + mcpFailures: [], + }; + } + } + /** + * Formats initialization information from system init entry + * @param {any} initEntry - The system init entry containing tools, mcp_servers, etc. + * @returns {{markdown: string, mcpFailures: string[]}} Result with formatted markdown string and MCP failure list + */ + function formatInitializationSummary(initEntry) { + let markdown = ""; + const mcpFailures = []; + // Display model and session info + if (initEntry.model) { + markdown += `**Model:** ${initEntry.model}\n\n`; + } + if (initEntry.session_id) { + markdown += `**Session ID:** ${initEntry.session_id}\n\n`; + } + if (initEntry.cwd) { + // Show a cleaner path by removing common prefixes + const cleanCwd = initEntry.cwd.replace( + /^\/home\/runner\/work\/[^\/]+\/[^\/]+/, + "." + ); + markdown += `**Working Directory:** ${cleanCwd}\n\n`; + } + // Display MCP servers status + if (initEntry.mcp_servers && Array.isArray(initEntry.mcp_servers)) { + markdown += "**MCP Servers:**\n"; + for (const server of initEntry.mcp_servers) { + const statusIcon = + server.status === "connected" + ? "✅" + : server.status === "failed" + ? "❌" + : "❓"; + markdown += `- ${statusIcon} ${server.name} (${server.status})\n`; + // Track failed MCP servers + if (server.status === "failed") { + mcpFailures.push(server.name); + } + } + markdown += "\n"; + } + // Display tools by category + if (initEntry.tools && Array.isArray(initEntry.tools)) { + markdown += "**Available Tools:**\n"; + // Categorize tools + /** @type {{ [key: string]: string[] }} */ + const categories = { + Core: [], + "File Operations": [], + "Git/GitHub": [], + MCP: [], + Other: [], + }; + for (const tool of initEntry.tools) { + if ( + ["Task", "Bash", "BashOutput", "KillBash", "ExitPlanMode"].includes( + tool + ) + ) { + categories["Core"].push(tool); + } else if ( + [ + "Read", + "Edit", + "MultiEdit", + "Write", + "LS", + "Grep", + "Glob", + "NotebookEdit", + ].includes(tool) + ) { + categories["File Operations"].push(tool); + } else if (tool.startsWith("mcp__github__")) { + categories["Git/GitHub"].push(formatMcpName(tool)); + } else if ( + tool.startsWith("mcp__") || + ["ListMcpResourcesTool", "ReadMcpResourceTool"].includes(tool) + ) { + categories["MCP"].push( + tool.startsWith("mcp__") ? formatMcpName(tool) : tool + ); + } else { + categories["Other"].push(tool); + } + } + // Display categories with tools + for (const [category, tools] of Object.entries(categories)) { + if (tools.length > 0) { + markdown += `- **${category}:** ${tools.length} tools\n`; + if (tools.length <= 5) { + // Show all tools if 5 or fewer + markdown += ` - ${tools.join(", ")}\n`; + } else { + // Show first few and count + markdown += ` - ${tools.slice(0, 3).join(", ")}, and ${tools.length - 3} more\n`; + } + } + } + markdown += "\n"; + } + // Display slash commands if available + if (initEntry.slash_commands && Array.isArray(initEntry.slash_commands)) { + const commandCount = initEntry.slash_commands.length; + markdown += `**Slash Commands:** ${commandCount} available\n`; + if (commandCount <= 10) { + markdown += `- ${initEntry.slash_commands.join(", ")}\n`; + } else { + markdown += `- ${initEntry.slash_commands.slice(0, 5).join(", ")}, and ${commandCount - 5} more\n`; + } + markdown += "\n"; + } + return { markdown, mcpFailures }; + } + /** + * Formats a tool use entry with its result into markdown + * @param {any} toolUse - The tool use object containing name, input, etc. + * @param {any} toolResult - The corresponding tool result object + * @returns {string} Formatted markdown string + */ + function formatToolUse(toolUse, toolResult) { + const toolName = toolUse.name; + const input = toolUse.input || {}; + // Skip TodoWrite except the very last one (we'll handle this separately) + if (toolName === "TodoWrite") { + return ""; // Skip for now, would need global context to find the last one + } + // Helper function to determine status icon + function getStatusIcon() { + if (toolResult) { + return toolResult.is_error === true ? "❌" : "✅"; + } + return "❓"; // Unknown by default + } + let markdown = ""; + const statusIcon = getStatusIcon(); + switch (toolName) { + case "Bash": + const command = input.command || ""; + const description = input.description || ""; + // Format the command to be single line + const formattedCommand = formatBashCommand(command); + if (description) { + markdown += `${description}:\n\n`; + } + markdown += `${statusIcon} \`${formattedCommand}\`\n\n`; + break; + case "Read": + const filePath = input.file_path || input.path || ""; + const relativePath = filePath.replace( + /^\/[^\/]*\/[^\/]*\/[^\/]*\/[^\/]*\//, + "" + ); // Remove /home/runner/work/repo/repo/ prefix + markdown += `${statusIcon} Read \`${relativePath}\`\n\n`; + break; + case "Write": + case "Edit": + case "MultiEdit": + const writeFilePath = input.file_path || input.path || ""; + const writeRelativePath = writeFilePath.replace( + /^\/[^\/]*\/[^\/]*\/[^\/]*\/[^\/]*\//, + "" + ); + markdown += `${statusIcon} Write \`${writeRelativePath}\`\n\n`; + break; + case "Grep": + case "Glob": + const query = input.query || input.pattern || ""; + markdown += `${statusIcon} Search for \`${truncateString(query, 80)}\`\n\n`; + break; + case "LS": + const lsPath = input.path || ""; + const lsRelativePath = lsPath.replace( + /^\/[^\/]*\/[^\/]*\/[^\/]*\/[^\/]*\//, + "" + ); + markdown += `${statusIcon} LS: ${lsRelativePath || lsPath}\n\n`; + break; + default: + // Handle MCP calls and other tools + if (toolName.startsWith("mcp__")) { + const mcpName = formatMcpName(toolName); + const params = formatMcpParameters(input); + markdown += `${statusIcon} ${mcpName}(${params})\n\n`; + } else { + // Generic tool formatting - show the tool name and main parameters + const keys = Object.keys(input); + if (keys.length > 0) { + // Try to find the most important parameter + const mainParam = + keys.find(k => + ["query", "command", "path", "file_path", "content"].includes(k) + ) || keys[0]; + const value = String(input[mainParam] || ""); + if (value) { + markdown += `${statusIcon} ${toolName}: ${truncateString(value, 100)}\n\n`; + } else { + markdown += `${statusIcon} ${toolName}\n\n`; + } + } else { + markdown += `${statusIcon} ${toolName}\n\n`; + } + } + } + return markdown; + } + /** + * Formats MCP tool name from internal format to display format + * @param {string} toolName - The raw tool name (e.g., mcp__github__search_issues) + * @returns {string} Formatted tool name (e.g., github::search_issues) + */ + function formatMcpName(toolName) { + // Convert mcp__github__search_issues to github::search_issues + if (toolName.startsWith("mcp__")) { + const parts = toolName.split("__"); + if (parts.length >= 3) { + const provider = parts[1]; // github, etc. + const method = parts.slice(2).join("_"); // search_issues, etc. + return `${provider}::${method}`; + } + } + return toolName; + } + /** + * Formats MCP parameters into a human-readable string + * @param {Record} input - The input object containing parameters + * @returns {string} Formatted parameters string + */ + function formatMcpParameters(input) { + const keys = Object.keys(input); + if (keys.length === 0) return ""; + const paramStrs = []; + for (const key of keys.slice(0, 4)) { + // Show up to 4 parameters + const value = String(input[key] || ""); + paramStrs.push(`${key}: ${truncateString(value, 40)}`); + } + if (keys.length > 4) { + paramStrs.push("..."); + } + return paramStrs.join(", "); + } + /** + * Formats a bash command by normalizing whitespace and escaping + * @param {string} command - The raw bash command string + * @returns {string} Formatted and escaped command string + */ + function formatBashCommand(command) { + if (!command) return ""; + // Convert multi-line commands to single line by replacing newlines with spaces + // and collapsing multiple spaces + let formatted = command + .replace(/\n/g, " ") // Replace newlines with spaces + .replace(/\r/g, " ") // Replace carriage returns with spaces + .replace(/\t/g, " ") // Replace tabs with spaces + .replace(/\s+/g, " ") // Collapse multiple spaces into one + .trim(); // Remove leading/trailing whitespace + // Escape backticks to prevent markdown issues + formatted = formatted.replace(/`/g, "\\`"); + // Truncate if too long (keep reasonable length for summary) + const maxLength = 80; + if (formatted.length > maxLength) { + formatted = formatted.substring(0, maxLength) + "..."; + } + return formatted; + } + /** + * Truncates a string to a maximum length with ellipsis + * @param {string} str - The string to truncate + * @param {number} maxLength - Maximum allowed length + * @returns {string} Truncated string with ellipsis if needed + */ + function truncateString(str, maxLength) { + if (!str) return ""; + if (str.length <= maxLength) return str; + return str.substring(0, maxLength) + "..."; + } + // Export for testing + if (typeof module !== "undefined" && module.exports) { + module.exports = { + parseClaudeLog, + formatToolUse, + formatInitializationSummary, + formatBashCommand, + truncateString, + }; + } + main(); + - name: Upload agent logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-claude-with-cache-memory-and-custom-docker-image.log + path: /tmp/test-claude-with-cache-memory-and-custom-docker-image.log + if-no-files-found: warn + diff --git a/pkg/cli/workflows/test-claude-cache-memory.md b/pkg/cli/workflows/test-claude-cache-memory.md new file mode 100644 index 00000000000..75ce2520025 --- /dev/null +++ b/pkg/cli/workflows/test-claude-cache-memory.md @@ -0,0 +1,58 @@ +--- +engine: claude +on: + workflow_dispatch: + inputs: + task: + description: 'Task to remember' + required: true + default: 'Store this information for later' + +tools: + cache-memory: + docker-image: "ghcr.io/modelcontextprotocol/server-memory:v1.0.0" + retention-days: 14 + github: + allowed: [get_repository] + +timeout_minutes: 5 +--- + +# Test Claude with Cache Memory and Custom Docker Image + +You are a test agent that demonstrates the cache-memory functionality with Claude engine using a custom Docker image. + +## Task + +Your job is to: + +1. **Store a test task** in your memory using the memory MCP server +2. **Retrieve any previous tasks** that you've stored in memory +3. **Report on the memory contents** including both current and historical tasks +4. **Use GitHub tools** to get basic repository information + +## Instructions + +1. First, use the memory tool to see what you already know from previous runs +2. Store a new test task: "Test task for run ${{ github.run_number }}" in your memory +3. List all tasks you now have in memory +4. Get basic information about this repository using the GitHub tool +5. Provide a summary of: + - What you remembered from before + - What you just stored + - Basic repository information + +## Expected Behavior + +- **First run**: Should show empty memory, then store the new task +- **Subsequent runs**: Should show previously stored tasks, then add the new one +- **Memory persistence**: Tasks should persist across workflow runs thanks to cache-memory +- **Custom Docker image**: Uses ghcr.io/modelcontextprotocol/server-memory:v1.0.0 instead of latest +- **Artifact upload**: Memory data is also uploaded as artifact with 14-day retention + +This workflow tests that the cache-memory configuration properly: +- Mounts the memory MCP server with custom Docker image +- Persists data between runs using GitHub Actions cache +- Uploads memory data as artifacts with configurable retention +- Works with Claude engine and MCP tools +- Integrates with other tools like GitHub \ No newline at end of file diff --git a/pkg/cli/workflows/test-claude-missing-tool.lock.yml b/pkg/cli/workflows/test-claude-missing-tool.lock.yml new file mode 100644 index 00000000000..10bfdb5ded1 --- /dev/null +++ b/pkg/cli/workflows/test-claude-missing-tool.lock.yml @@ -0,0 +1,1863 @@ +# This file was automatically generated by gh-aw. DO NOT EDIT. +# To update this file, edit the corresponding .md file and run: +# gh aw compile + +name: "Test Claude with Missing Tool Safe Output and Cache Memory" +on: + workflow_dispatch: + inputs: + missing_tool: + default: example-missing-tool + description: Tool to report as missing + required: true + +permissions: {} + +concurrency: + group: "gh-aw-${{ github.workflow }}" + +run-name: "Test Claude with Missing Tool Safe Output and Cache Memory" + +jobs: + test-claude-with-missing-tool-safe-output-and-cache-memory: + runs-on: ubuntu-latest + permissions: read-all + outputs: + output: ${{ steps.collect_output.outputs.output }} + steps: + - name: Checkout repository + uses: actions/checkout@v5 + # Cache memory MCP configuration from frontmatter processed below + - name: Create cache-memory directory + run: mkdir -p /tmp/cache-memory + - name: Cache memory MCP data + uses: actions/cache@v4 + with: + key: memory-${{ github.workflow }}-${{ github.run_id }} + path: /tmp/cache-memory + restore-keys: | + memory-${{ github.workflow }}- + memory- + - name: Generate Claude Settings + run: | + mkdir -p /tmp/.claude + cat > /tmp/.claude/settings.json << 'EOF' + { + "hooks": { + "PreToolUse": [ + { + "matcher": "WebFetch|WebSearch", + "hooks": [ + { + "type": "command", + "command": ".claude/hooks/network_permissions.py" + } + ] + } + ] + } + } + EOF + - name: Generate Network Permissions Hook + run: | + mkdir -p .claude/hooks + cat > .claude/hooks/network_permissions.py << 'EOF' + #!/usr/bin/env python3 + """ + Network permissions validator for Claude Code engine. + Generated by gh-aw from engine network permissions configuration. + """ + + import json + import sys + import urllib.parse + import re + + # Domain allow-list (populated during generation) + ALLOWED_DOMAINS = ["crl3.digicert.com","crl4.digicert.com","ocsp.digicert.com","ts-crl.ws.symantec.com","ts-ocsp.ws.symantec.com","crl.geotrust.com","ocsp.geotrust.com","crl.thawte.com","ocsp.thawte.com","crl.verisign.com","ocsp.verisign.com","crl.globalsign.com","ocsp.globalsign.com","crls.ssl.com","ocsp.ssl.com","crl.identrust.com","ocsp.identrust.com","crl.sectigo.com","ocsp.sectigo.com","crl.usertrust.com","ocsp.usertrust.com","s.symcb.com","s.symcd.com","json-schema.org","json.schemastore.org","archive.ubuntu.com","security.ubuntu.com","ppa.launchpad.net","keyserver.ubuntu.com","azure.archive.ubuntu.com","api.snapcraft.io","packagecloud.io","packages.cloud.google.com","packages.microsoft.com"] + + def extract_domain(url_or_query): + """Extract domain from URL or search query.""" + if not url_or_query: + return None + + if url_or_query.startswith(('http://', 'https://')): + return urllib.parse.urlparse(url_or_query).netloc.lower() + + # Check for domain patterns in search queries + match = re.search(r'site:([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', url_or_query) + if match: + return match.group(1).lower() + + return None + + def is_domain_allowed(domain): + """Check if domain is allowed.""" + if not domain: + # If no domain detected, allow only if not under deny-all policy + return bool(ALLOWED_DOMAINS) # False if empty list (deny-all), True if has domains + + # Empty allowed domains means deny all + if not ALLOWED_DOMAINS: + return False + + for pattern in ALLOWED_DOMAINS: + regex = pattern.replace('.', r'\.').replace('*', '.*') + if re.match(f'^{regex}$', domain): + return True + return False + + # Main logic + try: + data = json.load(sys.stdin) + tool_name = data.get('tool_name', '') + tool_input = data.get('tool_input', {}) + + if tool_name not in ['WebFetch', 'WebSearch']: + sys.exit(0) # Allow other tools + + target = tool_input.get('url') or tool_input.get('query', '') + domain = extract_domain(target) + + # For WebSearch, apply domain restrictions consistently + # If no domain detected in search query, check if restrictions are in place + if tool_name == 'WebSearch' and not domain: + # Since this hook is only generated when network permissions are configured, + # empty ALLOWED_DOMAINS means deny-all policy + if not ALLOWED_DOMAINS: # Empty list means deny all + print(f"Network access blocked: deny-all policy in effect", file=sys.stderr) + print(f"No domains are allowed for WebSearch", file=sys.stderr) + sys.exit(2) # Block under deny-all policy + else: + print(f"Network access blocked for web-search: no specific domain detected", file=sys.stderr) + print(f"Allowed domains: {', '.join(ALLOWED_DOMAINS)}", file=sys.stderr) + sys.exit(2) # Block general searches when domain allowlist is configured + + if not is_domain_allowed(domain): + print(f"Network access blocked for domain: {domain}", file=sys.stderr) + print(f"Allowed domains: {', '.join(ALLOWED_DOMAINS)}", file=sys.stderr) + sys.exit(2) # Block with feedback to Claude + + sys.exit(0) # Allow + + except Exception as e: + print(f"Network validation error: {e}", file=sys.stderr) + sys.exit(2) # Block on errors + + EOF + chmod +x .claude/hooks/network_permissions.py + - name: Setup agent output + id: setup_agent_output + uses: actions/github-script@v7 + with: + script: | + function main() { + const fs = require("fs"); + const crypto = require("crypto"); + // Generate a random filename for the output file + const randomId = crypto.randomBytes(8).toString("hex"); + const outputFile = `/tmp/aw_output_${randomId}.txt`; + // Ensure the /tmp directory exists + fs.mkdirSync("/tmp", { recursive: true }); + // We don't create the file, as the name is sufficiently random + // and some engines (Claude) fails first Write to the file + // if it exists and has not been read. + // Set the environment variable for subsequent steps + core.exportVariable("GITHUB_AW_SAFE_OUTPUTS", outputFile); + // Also set as step output for reference + core.setOutput("output_file", outputFile); + } + main(); + - name: Setup MCPs + run: | + mkdir -p /tmp/mcp-config + cat > /tmp/mcp-config/mcp-servers.json << 'EOF' + { + "mcpServers": { + "memory": { + "command": "npx", + "args": [ + "@modelcontextprotocol/server-memory" + ], + "env": { + "MEMORY_FILE_PATH": "/tmp/cache-memory/memory.json" + } + }, + "github": { + "command": "docker", + "args": [ + "run", + "-i", + "--rm", + "-e", + "GITHUB_PERSONAL_ACCESS_TOKEN", + "ghcr.io/github/github-mcp-server:sha-09deac4" + ], + "env": { + "GITHUB_PERSONAL_ACCESS_TOKEN": "${{ secrets.GITHUB_TOKEN }}" + } + } + } + } + EOF + - name: Create prompt + env: + GITHUB_AW_PROMPT: /tmp/aw-prompts/prompt.txt + GITHUB_AW_SAFE_OUTPUTS: ${{ env.GITHUB_AW_SAFE_OUTPUTS }} + run: | + mkdir -p /tmp/aw-prompts + cat > $GITHUB_AW_PROMPT << 'EOF' + # Test Claude with Missing Tool Safe Output and Cache Memory + + You are a test agent that demonstrates the missing-tool safe output functionality with Claude engine, enhanced with persistent memory. + + ## Task + + Your job is to: + + 1. **Check your memory** for any previous missing tool reports + 2. **Report a missing tool** using the safe output functionality + 3. **Store the report in memory** for future reference + 4. **Use GitHub tools** to get basic repository information + + ## Instructions + + 1. First, check your memory to see if you've reported any missing tools before + 2. Report that the tool specified in the input (${{ github.event.inputs.missing_tool }}) is missing + 3. Use the safe output functionality to properly report the missing tool with: + - Tool name: ${{ github.event.inputs.missing_tool }} + - Reason: "This tool is not available in the current workflow environment" + - Alternatives: "Consider using alternative tools or requesting tool addition" + - Context: "Testing missing-tool safe output with Claude engine" + 4. Store this report in your memory for future reference + 5. Get basic information about this repository using the GitHub tool + 6. Provide a summary of your actions + + ## Expected Behavior + + - **Memory persistence**: Should remember previous missing tool reports across runs + - **Safe output**: Should generate proper missing-tool JSON output + - **Tool integration**: Should work with GitHub tools despite the "missing" tool + - **Staged mode**: Should prevent actual GitHub interactions when staged: true + + This workflow tests that: + - Claude can properly report missing tools using safe outputs + - Cache-memory preserves missing tool reports across runs + - Integration works with existing tools like GitHub + - Safe output system handles missing tool scenarios correctly + + + --- + + ## Reporting Missing Tools or Functionality + + **IMPORTANT**: To do the actions mentioned in the header of this section, do NOT attempt to use MCP tools, do NOT attempt to use `gh`, do NOT attempt to use the GitHub API. You don't have write access to the GitHub repo. Instead write JSON objects to the file "${{ env.GITHUB_AW_SAFE_OUTPUTS }}". Each line should contain a single JSON object (JSONL format). You can write them one by one as you do them. + + **Format**: Write one JSON object per line. Each object must have a `type` field specifying the action type. + + ### Available Output Types: + + **Reporting Missing Tools or Functionality** + + If you need to use a tool or functionality that is not available to complete your task: + 1. Append an entry on a new line "${{ env.GITHUB_AW_SAFE_OUTPUTS }}": + ```json + {"type": "missing-tool", "tool": "tool-name", "reason": "Why this tool is needed", "alternatives": "Suggested alternatives or workarounds"} + ``` + 2. The `tool` field should specify the name or type of missing functionality + 3. The `reason` field should explain why this tool/functionality is required to complete the task + 4. The `alternatives` field is optional but can suggest workarounds or alternative approaches + 5. After you write to that file, read it back and check it is valid, see below. + + **Example JSONL file content:** + ``` + {"type": "missing-tool", "tool": "docker", "reason": "Need Docker to build container images", "alternatives": "Could use GitHub Actions build instead"} + ``` + + **Important Notes:** + - Do NOT attempt to use MCP tools, `gh`, or the GitHub API for these actions + - Each JSON object must be on its own line + - Only include output types that are configured for this workflow + - The content of this file will be automatically processed and executed + - After you write or append to "${{ env.GITHUB_AW_SAFE_OUTPUTS }}", read it back as JSONL and check it is valid. Make sure it actually puts multiple entries on different lines rather than trying to separate entries on one line with the text "\n" - we've seen you make this mistake before, be careful! Maybe run a bash script to check the validity of the JSONL line by line if you have access to bash. If there are any problems with the JSONL make any necessary corrections to it to fix it up + + EOF + - name: Print prompt to step summary + run: | + echo "## Generated Prompt" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo '``````markdown' >> $GITHUB_STEP_SUMMARY + cat $GITHUB_AW_PROMPT >> $GITHUB_STEP_SUMMARY + echo '``````' >> $GITHUB_STEP_SUMMARY + env: + GITHUB_AW_PROMPT: /tmp/aw-prompts/prompt.txt + - name: Generate agentic run info + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + + const awInfo = { + engine_id: "claude", + engine_name: "Claude Code", + model: "", + version: "", + workflow_name: "Test Claude with Missing Tool Safe Output and Cache Memory", + experimental: false, + supports_tools_whitelist: true, + supports_http_transport: true, + run_id: context.runId, + run_number: context.runNumber, + run_attempt: process.env.GITHUB_RUN_ATTEMPT, + repository: context.repo.owner + '/' + context.repo.repo, + ref: context.ref, + sha: context.sha, + actor: context.actor, + event_name: context.eventName, + staged: true, + created_at: new Date().toISOString() + }; + + // Write to /tmp directory to avoid inclusion in PR + const tmpPath = '/tmp/aw_info.json'; + fs.writeFileSync(tmpPath, JSON.stringify(awInfo, null, 2)); + console.log('Generated aw_info.json at:', tmpPath); + console.log(JSON.stringify(awInfo, null, 2)); + - name: Upload agentic run info + if: always() + uses: actions/upload-artifact@v4 + with: + name: aw_info.json + path: /tmp/aw_info.json + if-no-files-found: warn + - name: Execute Claude Code Action + id: agentic_execution + uses: anthropics/claude-code-base-action@v0.0.56 + with: + # Allowed tools (sorted): + # - ExitPlanMode + # - Glob + # - Grep + # - LS + # - NotebookRead + # - Read + # - Task + # - TodoWrite + # - Write + # - mcp__github__download_workflow_run_artifact + # - mcp__github__get_code_scanning_alert + # - mcp__github__get_commit + # - mcp__github__get_dependabot_alert + # - mcp__github__get_discussion + # - mcp__github__get_discussion_comments + # - mcp__github__get_file_contents + # - mcp__github__get_issue + # - mcp__github__get_issue_comments + # - mcp__github__get_job_logs + # - mcp__github__get_me + # - mcp__github__get_notification_details + # - mcp__github__get_pull_request + # - mcp__github__get_pull_request_comments + # - mcp__github__get_pull_request_diff + # - mcp__github__get_pull_request_files + # - mcp__github__get_pull_request_reviews + # - mcp__github__get_pull_request_status + # - mcp__github__get_repository + # - mcp__github__get_secret_scanning_alert + # - mcp__github__get_tag + # - mcp__github__get_workflow_run + # - mcp__github__get_workflow_run_logs + # - mcp__github__get_workflow_run_usage + # - mcp__github__list_branches + # - mcp__github__list_code_scanning_alerts + # - mcp__github__list_commits + # - mcp__github__list_dependabot_alerts + # - mcp__github__list_discussion_categories + # - mcp__github__list_discussions + # - mcp__github__list_issues + # - mcp__github__list_notifications + # - mcp__github__list_pull_requests + # - mcp__github__list_secret_scanning_alerts + # - mcp__github__list_tags + # - mcp__github__list_workflow_jobs + # - mcp__github__list_workflow_run_artifacts + # - mcp__github__list_workflow_runs + # - mcp__github__list_workflows + # - mcp__github__search_code + # - mcp__github__search_issues + # - mcp__github__search_orgs + # - mcp__github__search_pull_requests + # - mcp__github__search_repositories + # - mcp__github__search_users + # - mcp__memory + allowed_tools: "ExitPlanMode,Glob,Grep,LS,NotebookRead,Read,Task,TodoWrite,Write,mcp__github__download_workflow_run_artifact,mcp__github__get_code_scanning_alert,mcp__github__get_commit,mcp__github__get_dependabot_alert,mcp__github__get_discussion,mcp__github__get_discussion_comments,mcp__github__get_file_contents,mcp__github__get_issue,mcp__github__get_issue_comments,mcp__github__get_job_logs,mcp__github__get_me,mcp__github__get_notification_details,mcp__github__get_pull_request,mcp__github__get_pull_request_comments,mcp__github__get_pull_request_diff,mcp__github__get_pull_request_files,mcp__github__get_pull_request_reviews,mcp__github__get_pull_request_status,mcp__github__get_repository,mcp__github__get_secret_scanning_alert,mcp__github__get_tag,mcp__github__get_workflow_run,mcp__github__get_workflow_run_logs,mcp__github__get_workflow_run_usage,mcp__github__list_branches,mcp__github__list_code_scanning_alerts,mcp__github__list_commits,mcp__github__list_dependabot_alerts,mcp__github__list_discussion_categories,mcp__github__list_discussions,mcp__github__list_issues,mcp__github__list_notifications,mcp__github__list_pull_requests,mcp__github__list_secret_scanning_alerts,mcp__github__list_tags,mcp__github__list_workflow_jobs,mcp__github__list_workflow_run_artifacts,mcp__github__list_workflow_runs,mcp__github__list_workflows,mcp__github__search_code,mcp__github__search_issues,mcp__github__search_orgs,mcp__github__search_pull_requests,mcp__github__search_repositories,mcp__github__search_users,mcp__memory" + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + claude_env: | + GITHUB_AW_SAFE_OUTPUTS: ${{ env.GITHUB_AW_SAFE_OUTPUTS }} + GITHUB_AW_SAFE_OUTPUTS_STAGED: "true" + mcp_config: /tmp/mcp-config/mcp-servers.json + prompt_file: /tmp/aw-prompts/prompt.txt + settings: /tmp/.claude/settings.json + timeout_minutes: 5 + env: + GITHUB_AW_PROMPT: /tmp/aw-prompts/prompt.txt + GITHUB_AW_SAFE_OUTPUTS: ${{ env.GITHUB_AW_SAFE_OUTPUTS }} + - name: Capture Agentic Action logs + if: always() + run: | + # Copy the detailed execution file from Agentic Action if available + if [ -n "${{ steps.agentic_execution.outputs.execution_file }}" ] && [ -f "${{ steps.agentic_execution.outputs.execution_file }}" ]; then + cp ${{ steps.agentic_execution.outputs.execution_file }} /tmp/test-claude-with-missing-tool-safe-output-and-cache-memory.log + else + echo "No execution file output found from Agentic Action" >> /tmp/test-claude-with-missing-tool-safe-output-and-cache-memory.log + fi + + # Ensure log file exists + touch /tmp/test-claude-with-missing-tool-safe-output-and-cache-memory.log + - name: Print Agent output + env: + GITHUB_AW_SAFE_OUTPUTS: ${{ env.GITHUB_AW_SAFE_OUTPUTS }} + run: | + echo "## Agent Output (JSONL)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo '``````json' >> $GITHUB_STEP_SUMMARY + if [ -f ${{ env.GITHUB_AW_SAFE_OUTPUTS }} ]; then + cat ${{ env.GITHUB_AW_SAFE_OUTPUTS }} >> $GITHUB_STEP_SUMMARY + # Ensure there's a newline after the file content if it doesn't end with one + if [ -s ${{ env.GITHUB_AW_SAFE_OUTPUTS }} ] && [ "$(tail -c1 ${{ env.GITHUB_AW_SAFE_OUTPUTS }})" != "" ]; then + echo "" >> $GITHUB_STEP_SUMMARY + fi + else + echo "No agent output file found" >> $GITHUB_STEP_SUMMARY + fi + echo '``````' >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + - name: Upload agentic output file + if: always() + uses: actions/upload-artifact@v4 + with: + name: safe_output.jsonl + path: ${{ env.GITHUB_AW_SAFE_OUTPUTS }} + if-no-files-found: warn + - name: Ingest agent output + id: collect_output + uses: actions/github-script@v7 + env: + GITHUB_AW_SAFE_OUTPUTS: ${{ env.GITHUB_AW_SAFE_OUTPUTS }} + GITHUB_AW_SAFE_OUTPUTS_CONFIG: "{\"missing-tool\":{\"enabled\":true,\"max\":5}}" + with: + script: | + async function main() { + const fs = require("fs"); + /** + * Sanitizes content for safe output in GitHub Actions + * @param {string} content - The content to sanitize + * @returns {string} The sanitized content + */ + function sanitizeContent(content) { + if (!content || typeof content !== "string") { + return ""; + } + // Read allowed domains from environment variable + const allowedDomainsEnv = process.env.GITHUB_AW_ALLOWED_DOMAINS; + const defaultAllowedDomains = [ + "github.com", + "github.io", + "githubusercontent.com", + "githubassets.com", + "github.dev", + "codespaces.new", + ]; + const allowedDomains = allowedDomainsEnv + ? allowedDomainsEnv + .split(",") + .map(d => d.trim()) + .filter(d => d) + : defaultAllowedDomains; + let sanitized = content; + // Neutralize @mentions to prevent unintended notifications + sanitized = neutralizeMentions(sanitized); + // Remove control characters (except newlines and tabs) + sanitized = sanitized.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); + // XML character escaping + sanitized = sanitized + .replace(/&/g, "&") // Must be first to avoid double-escaping + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); + // URI filtering - replace non-https protocols with "(redacted)" + sanitized = sanitizeUrlProtocols(sanitized); + // Domain filtering for HTTPS URIs + sanitized = sanitizeUrlDomains(sanitized); + // Limit total length to prevent DoS (0.5MB max) + const maxLength = 524288; + if (sanitized.length > maxLength) { + sanitized = + sanitized.substring(0, maxLength) + + "\n[Content truncated due to length]"; + } + // Limit number of lines to prevent log flooding (65k max) + const lines = sanitized.split("\n"); + const maxLines = 65000; + if (lines.length > maxLines) { + sanitized = + lines.slice(0, maxLines).join("\n") + + "\n[Content truncated due to line count]"; + } + // Remove ANSI escape sequences + sanitized = sanitized.replace(/\x1b\[[0-9;]*[mGKH]/g, ""); + // Neutralize common bot trigger phrases + sanitized = neutralizeBotTriggers(sanitized); + // Trim excessive whitespace + return sanitized.trim(); + /** + * Remove unknown domains + * @param {string} s - The string to process + * @returns {string} The string with unknown domains redacted + */ + function sanitizeUrlDomains(s) { + return s.replace( + /\bhttps:\/\/([^\/\s\])}'"<>&\x00-\x1f]+)/gi, + (match, domain) => { + // Extract the hostname part (before first slash, colon, or other delimiter) + const hostname = domain.split(/[\/:\?#]/)[0].toLowerCase(); + // Check if this domain or any parent domain is in the allowlist + const isAllowed = allowedDomains.some(allowedDomain => { + const normalizedAllowed = allowedDomain.toLowerCase(); + return ( + hostname === normalizedAllowed || + hostname.endsWith("." + normalizedAllowed) + ); + }); + return isAllowed ? match : "(redacted)"; + } + ); + } + /** + * Remove unknown protocols except https + * @param {string} s - The string to process + * @returns {string} The string with non-https protocols redacted + */ + function sanitizeUrlProtocols(s) { + // Match both protocol:// and protocol: patterns + return s.replace( + /\b(\w+):(?:\/\/)?[^\s\])}'"<>&\x00-\x1f]+/gi, + (match, protocol) => { + // Allow https (case insensitive), redact everything else + return protocol.toLowerCase() === "https" ? match : "(redacted)"; + } + ); + } + /** + * Neutralizes @mentions by wrapping them in backticks + * @param {string} s - The string to process + * @returns {string} The string with neutralized mentions + */ + function neutralizeMentions(s) { + // Replace @name or @org/team outside code with `@name` + return s.replace( + /(^|[^\w`])@([A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?(?:\/[A-Za-z0-9._-]+)?)/g, + (_m, p1, p2) => `${p1}\`@${p2}\`` + ); + } + /** + * Neutralizes bot trigger phrases by wrapping them in backticks + * @param {string} s - The string to process + * @returns {string} The string with neutralized bot triggers + */ + function neutralizeBotTriggers(s) { + // Neutralize common bot trigger phrases like "fixes #123", "closes #asdfs", etc. + return s.replace( + /\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, + (match, action, ref) => `\`${action} #${ref}\`` + ); + } + } + /** + * Gets the maximum allowed count for a given output type + * @param {string} itemType - The output item type + * @param {any} config - The safe-outputs configuration + * @returns {number} The maximum allowed count + */ + function getMaxAllowedForType(itemType, config) { + // Check if max is explicitly specified in config + if ( + config && + config[itemType] && + typeof config[itemType] === "object" && + config[itemType].max + ) { + return config[itemType].max; + } + // Use default limits for plural-supported types + switch (itemType) { + case "create-issue": + return 1; // Only one issue allowed + case "add-issue-comment": + return 1; // Only one comment allowed + case "create-pull-request": + return 1; // Only one pull request allowed + case "create-pull-request-review-comment": + return 10; // Default to 10 review comments allowed + case "add-issue-label": + return 5; // Only one labels operation allowed + case "update-issue": + return 1; // Only one issue update allowed + case "push-to-pr-branch": + return 1; // Only one push to branch allowed + case "create-discussion": + return 1; // Only one discussion allowed + case "missing-tool": + return 1000; // Allow many missing tool reports (default: unlimited) + case "create-code-scanning-alert": + return 1000; // Allow many repository security advisories (default: unlimited) + default: + return 1; // Default to single item for unknown types + } + } + /** + * Attempts to repair common JSON syntax issues in LLM-generated content + * @param {string} jsonStr - The potentially malformed JSON string + * @returns {string} The repaired JSON string + */ + function repairJson(jsonStr) { + let repaired = jsonStr.trim(); + // remove invalid control characters like + // U+0014 (DC4) — represented here as "\u0014" + // Escape control characters not allowed in JSON strings (U+0000 through U+001F) + // Preserve common JSON escapes for \b, \f, \n, \r, \t and use \uXXXX for the rest. + /** @type {Record} */ + const _ctrl = { 8: "\\b", 9: "\\t", 10: "\\n", 12: "\\f", 13: "\\r" }; + repaired = repaired.replace(/[\u0000-\u001F]/g, ch => { + const c = ch.charCodeAt(0); + return _ctrl[c] || "\\u" + c.toString(16).padStart(4, "0"); + }); + // Fix single quotes to double quotes (must be done first) + repaired = repaired.replace(/'/g, '"'); + // Fix missing quotes around object keys + repaired = repaired.replace( + /([{,]\s*)([a-zA-Z_$][a-zA-Z0-9_$]*)\s*:/g, + '$1"$2":' + ); + // Fix newlines and tabs inside strings by escaping them + repaired = repaired.replace(/"([^"\\]*)"/g, (match, content) => { + if ( + content.includes("\n") || + content.includes("\r") || + content.includes("\t") + ) { + const escaped = content + .replace(/\\/g, "\\\\") + .replace(/\n/g, "\\n") + .replace(/\r/g, "\\r") + .replace(/\t/g, "\\t"); + return `"${escaped}"`; + } + return match; + }); + // Fix unescaped quotes inside string values + repaired = repaired.replace( + /"([^"]*)"([^":,}\]]*)"([^"]*)"(\s*[,:}\]])/g, + (match, p1, p2, p3, p4) => `"${p1}\\"${p2}\\"${p3}"${p4}` + ); + // Fix wrong bracket/brace types - arrays should end with ] not } + repaired = repaired.replace( + /(\[\s*(?:"[^"]*"(?:\s*,\s*"[^"]*")*\s*),?)\s*}/g, + "$1]" + ); + // Fix missing closing braces/brackets + const openBraces = (repaired.match(/\{/g) || []).length; + const closeBraces = (repaired.match(/\}/g) || []).length; + if (openBraces > closeBraces) { + repaired += "}".repeat(openBraces - closeBraces); + } else if (closeBraces > openBraces) { + repaired = "{".repeat(closeBraces - openBraces) + repaired; + } + // Fix missing closing brackets for arrays + const openBrackets = (repaired.match(/\[/g) || []).length; + const closeBrackets = (repaired.match(/\]/g) || []).length; + if (openBrackets > closeBrackets) { + repaired += "]".repeat(openBrackets - closeBrackets); + } else if (closeBrackets > openBrackets) { + repaired = "[".repeat(closeBrackets - openBrackets) + repaired; + } + // Fix trailing commas in objects and arrays (AFTER fixing brackets/braces) + repaired = repaired.replace(/,(\s*[}\]])/g, "$1"); + return repaired; + } + /** + * Attempts to parse JSON with repair fallback + * @param {string} jsonStr - The JSON string to parse + * @returns {Object|undefined} The parsed JSON object, or undefined if parsing fails + */ + function parseJsonWithRepair(jsonStr) { + try { + // First, try normal JSON.parse + return JSON.parse(jsonStr); + } catch (originalError) { + try { + // If that fails, try repairing and parsing again + const repairedJson = repairJson(jsonStr); + return JSON.parse(repairedJson); + } catch (repairError) { + // If repair also fails, throw the error + core.info(`invalid input json: ${jsonStr}`); + const originalMsg = + originalError instanceof Error + ? originalError.message + : String(originalError); + const repairMsg = + repairError instanceof Error + ? repairError.message + : String(repairError); + throw new Error( + `JSON parsing failed. Original: ${originalMsg}. After attempted repair: ${repairMsg}` + ); + } + } + } + const outputFile = process.env.GITHUB_AW_SAFE_OUTPUTS; + const safeOutputsConfig = process.env.GITHUB_AW_SAFE_OUTPUTS_CONFIG; + if (!outputFile) { + core.info("GITHUB_AW_SAFE_OUTPUTS not set, no output to collect"); + core.setOutput("output", ""); + return; + } + if (!fs.existsSync(outputFile)) { + core.info(`Output file does not exist: ${outputFile}`); + core.setOutput("output", ""); + return; + } + const outputContent = fs.readFileSync(outputFile, "utf8"); + if (outputContent.trim() === "") { + core.info("Output file is empty"); + core.setOutput("output", ""); + return; + } + core.info(`Raw output content length: ${outputContent.length}`); + // Parse the safe-outputs configuration + /** @type {any} */ + let expectedOutputTypes = {}; + if (safeOutputsConfig) { + try { + expectedOutputTypes = JSON.parse(safeOutputsConfig); + core.info( + `Expected output types: ${JSON.stringify(Object.keys(expectedOutputTypes))}` + ); + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + core.info(`Warning: Could not parse safe-outputs config: ${errorMsg}`); + } + } + // Parse JSONL content + const lines = outputContent.trim().split("\n"); + const parsedItems = []; + const errors = []; + for (let i = 0; i < lines.length; i++) { + const line = lines[i].trim(); + if (line === "") continue; // Skip empty lines + try { + /** @type {any} */ + const item = parseJsonWithRepair(line); + // If item is undefined (failed to parse), add error and process next line + if (item === undefined) { + errors.push(`Line ${i + 1}: Invalid JSON - JSON parsing failed`); + continue; + } + // Validate that the item has a 'type' field + if (!item.type) { + errors.push(`Line ${i + 1}: Missing required 'type' field`); + continue; + } + // Validate against expected output types + const itemType = item.type; + if (!expectedOutputTypes[itemType]) { + errors.push( + `Line ${i + 1}: Unexpected output type '${itemType}'. Expected one of: ${Object.keys(expectedOutputTypes).join(", ")}` + ); + continue; + } + // Check for too many items of the same type + const typeCount = parsedItems.filter( + existing => existing.type === itemType + ).length; + const maxAllowed = getMaxAllowedForType(itemType, expectedOutputTypes); + if (typeCount >= maxAllowed) { + errors.push( + `Line ${i + 1}: Too many items of type '${itemType}'. Maximum allowed: ${maxAllowed}.` + ); + continue; + } + // Basic validation based on type + switch (itemType) { + case "create-issue": + if (!item.title || typeof item.title !== "string") { + errors.push( + `Line ${i + 1}: create-issue requires a 'title' string field` + ); + continue; + } + if (!item.body || typeof item.body !== "string") { + errors.push( + `Line ${i + 1}: create-issue requires a 'body' string field` + ); + continue; + } + // Sanitize text content + item.title = sanitizeContent(item.title); + item.body = sanitizeContent(item.body); + // Sanitize labels if present + if (item.labels && Array.isArray(item.labels)) { + item.labels = item.labels.map( + /** @param {any} label */ label => + typeof label === "string" ? sanitizeContent(label) : label + ); + } + break; + case "add-issue-comment": + if (!item.body || typeof item.body !== "string") { + errors.push( + `Line ${i + 1}: add-issue-comment requires a 'body' string field` + ); + continue; + } + // Sanitize text content + item.body = sanitizeContent(item.body); + break; + case "create-pull-request": + if (!item.title || typeof item.title !== "string") { + errors.push( + `Line ${i + 1}: create-pull-request requires a 'title' string field` + ); + continue; + } + if (!item.body || typeof item.body !== "string") { + errors.push( + `Line ${i + 1}: create-pull-request requires a 'body' string field` + ); + continue; + } + // Sanitize text content + item.title = sanitizeContent(item.title); + item.body = sanitizeContent(item.body); + // Sanitize branch name if present + if (item.branch && typeof item.branch === "string") { + item.branch = sanitizeContent(item.branch); + } + // Sanitize labels if present + if (item.labels && Array.isArray(item.labels)) { + item.labels = item.labels.map( + /** @param {any} label */ label => + typeof label === "string" ? sanitizeContent(label) : label + ); + } + break; + case "add-issue-label": + if (!item.labels || !Array.isArray(item.labels)) { + errors.push( + `Line ${i + 1}: add-issue-label requires a 'labels' array field` + ); + continue; + } + if ( + item.labels.some( + /** @param {any} label */ label => typeof label !== "string" + ) + ) { + errors.push( + `Line ${i + 1}: add-issue-label labels array must contain only strings` + ); + continue; + } + // Sanitize label strings + item.labels = item.labels.map( + /** @param {any} label */ label => sanitizeContent(label) + ); + break; + case "update-issue": + // Check that at least one updateable field is provided + const hasValidField = + item.status !== undefined || + item.title !== undefined || + item.body !== undefined; + if (!hasValidField) { + errors.push( + `Line ${i + 1}: update-issue requires at least one of: 'status', 'title', or 'body' fields` + ); + continue; + } + // Validate status if provided + if (item.status !== undefined) { + if ( + typeof item.status !== "string" || + (item.status !== "open" && item.status !== "closed") + ) { + errors.push( + `Line ${i + 1}: update-issue 'status' must be 'open' or 'closed'` + ); + continue; + } + } + // Validate title if provided + if (item.title !== undefined) { + if (typeof item.title !== "string") { + errors.push( + `Line ${i + 1}: update-issue 'title' must be a string` + ); + continue; + } + item.title = sanitizeContent(item.title); + } + // Validate body if provided + if (item.body !== undefined) { + if (typeof item.body !== "string") { + errors.push( + `Line ${i + 1}: update-issue 'body' must be a string` + ); + continue; + } + item.body = sanitizeContent(item.body); + } + // Validate issue_number if provided (for target "*") + if (item.issue_number !== undefined) { + if ( + typeof item.issue_number !== "number" && + typeof item.issue_number !== "string" + ) { + errors.push( + `Line ${i + 1}: update-issue 'issue_number' must be a number or string` + ); + continue; + } + } + break; + case "push-to-pr-branch": + // Validate message if provided (optional) + if (item.message !== undefined) { + if (typeof item.message !== "string") { + errors.push( + `Line ${i + 1}: push-to-pr-branch 'message' must be a string` + ); + continue; + } + item.message = sanitizeContent(item.message); + } + // Validate pull_request_number if provided (for target "*") + if (item.pull_request_number !== undefined) { + if ( + typeof item.pull_request_number !== "number" && + typeof item.pull_request_number !== "string" + ) { + errors.push( + `Line ${i + 1}: push-to-pr-branch 'pull_request_number' must be a number or string` + ); + continue; + } + } + break; + case "create-pull-request-review-comment": + // Validate required path field + if (!item.path || typeof item.path !== "string") { + errors.push( + `Line ${i + 1}: create-pull-request-review-comment requires a 'path' string field` + ); + continue; + } + // Validate required line field + if ( + item.line === undefined || + (typeof item.line !== "number" && typeof item.line !== "string") + ) { + errors.push( + `Line ${i + 1}: create-pull-request-review-comment requires a 'line' number or string field` + ); + continue; + } + // Validate line is a positive integer + const lineNumber = + typeof item.line === "string" ? parseInt(item.line, 10) : item.line; + if ( + isNaN(lineNumber) || + lineNumber <= 0 || + !Number.isInteger(lineNumber) + ) { + errors.push( + `Line ${i + 1}: create-pull-request-review-comment 'line' must be a positive integer` + ); + continue; + } + // Validate required body field + if (!item.body || typeof item.body !== "string") { + errors.push( + `Line ${i + 1}: create-pull-request-review-comment requires a 'body' string field` + ); + continue; + } + // Sanitize required text content + item.body = sanitizeContent(item.body); + // Validate optional start_line field + if (item.start_line !== undefined) { + if ( + typeof item.start_line !== "number" && + typeof item.start_line !== "string" + ) { + errors.push( + `Line ${i + 1}: create-pull-request-review-comment 'start_line' must be a number or string` + ); + continue; + } + const startLineNumber = + typeof item.start_line === "string" + ? parseInt(item.start_line, 10) + : item.start_line; + if ( + isNaN(startLineNumber) || + startLineNumber <= 0 || + !Number.isInteger(startLineNumber) + ) { + errors.push( + `Line ${i + 1}: create-pull-request-review-comment 'start_line' must be a positive integer` + ); + continue; + } + if (startLineNumber > lineNumber) { + errors.push( + `Line ${i + 1}: create-pull-request-review-comment 'start_line' must be less than or equal to 'line'` + ); + continue; + } + } + // Validate optional side field + if (item.side !== undefined) { + if ( + typeof item.side !== "string" || + (item.side !== "LEFT" && item.side !== "RIGHT") + ) { + errors.push( + `Line ${i + 1}: create-pull-request-review-comment 'side' must be 'LEFT' or 'RIGHT'` + ); + continue; + } + } + break; + case "create-discussion": + if (!item.title || typeof item.title !== "string") { + errors.push( + `Line ${i + 1}: create-discussion requires a 'title' string field` + ); + continue; + } + if (!item.body || typeof item.body !== "string") { + errors.push( + `Line ${i + 1}: create-discussion requires a 'body' string field` + ); + continue; + } + // Sanitize text content + item.title = sanitizeContent(item.title); + item.body = sanitizeContent(item.body); + break; + case "missing-tool": + // Validate required tool field + if (!item.tool || typeof item.tool !== "string") { + errors.push( + `Line ${i + 1}: missing-tool requires a 'tool' string field` + ); + continue; + } + // Validate required reason field + if (!item.reason || typeof item.reason !== "string") { + errors.push( + `Line ${i + 1}: missing-tool requires a 'reason' string field` + ); + continue; + } + // Sanitize text content + item.tool = sanitizeContent(item.tool); + item.reason = sanitizeContent(item.reason); + // Validate optional alternatives field + if (item.alternatives !== undefined) { + if (typeof item.alternatives !== "string") { + errors.push( + `Line ${i + 1}: missing-tool 'alternatives' must be a string` + ); + continue; + } + item.alternatives = sanitizeContent(item.alternatives); + } + break; + case "create-code-scanning-alert": + // Validate required fields + if (!item.file || typeof item.file !== "string") { + errors.push( + `Line ${i + 1}: create-code-scanning-alert requires a 'file' field (string)` + ); + continue; + } + if ( + item.line === undefined || + item.line === null || + (typeof item.line !== "number" && typeof item.line !== "string") + ) { + errors.push( + `Line ${i + 1}: create-code-scanning-alert requires a 'line' field (number or string)` + ); + continue; + } + // Additional validation: line must be parseable as a positive integer + const parsedLine = parseInt(item.line, 10); + if (isNaN(parsedLine) || parsedLine <= 0) { + errors.push( + `Line ${i + 1}: create-code-scanning-alert 'line' must be a valid positive integer (got: ${item.line})` + ); + continue; + } + if (!item.severity || typeof item.severity !== "string") { + errors.push( + `Line ${i + 1}: create-code-scanning-alert requires a 'severity' field (string)` + ); + continue; + } + if (!item.message || typeof item.message !== "string") { + errors.push( + `Line ${i + 1}: create-code-scanning-alert requires a 'message' field (string)` + ); + continue; + } + // Validate severity level + const allowedSeverities = ["error", "warning", "info", "note"]; + if (!allowedSeverities.includes(item.severity.toLowerCase())) { + errors.push( + `Line ${i + 1}: create-code-scanning-alert 'severity' must be one of: ${allowedSeverities.join(", ")}` + ); + continue; + } + // Validate optional column field + if (item.column !== undefined) { + if ( + typeof item.column !== "number" && + typeof item.column !== "string" + ) { + errors.push( + `Line ${i + 1}: create-code-scanning-alert 'column' must be a number or string` + ); + continue; + } + // Additional validation: must be parseable as a positive integer + const parsedColumn = parseInt(item.column, 10); + if (isNaN(parsedColumn) || parsedColumn <= 0) { + errors.push( + `Line ${i + 1}: create-code-scanning-alert 'column' must be a valid positive integer (got: ${item.column})` + ); + continue; + } + } + // Validate optional ruleIdSuffix field + if (item.ruleIdSuffix !== undefined) { + if (typeof item.ruleIdSuffix !== "string") { + errors.push( + `Line ${i + 1}: create-code-scanning-alert 'ruleIdSuffix' must be a string` + ); + continue; + } + if (!/^[a-zA-Z0-9_-]+$/.test(item.ruleIdSuffix.trim())) { + errors.push( + `Line ${i + 1}: create-code-scanning-alert 'ruleIdSuffix' must contain only alphanumeric characters, hyphens, and underscores` + ); + continue; + } + } + // Normalize severity to lowercase and sanitize string fields + item.severity = item.severity.toLowerCase(); + item.file = sanitizeContent(item.file); + item.severity = sanitizeContent(item.severity); + item.message = sanitizeContent(item.message); + if (item.ruleIdSuffix) { + item.ruleIdSuffix = sanitizeContent(item.ruleIdSuffix); + } + break; + default: + errors.push(`Line ${i + 1}: Unknown output type '${itemType}'`); + continue; + } + core.info(`Line ${i + 1}: Valid ${itemType} item`); + parsedItems.push(item); + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + errors.push(`Line ${i + 1}: Invalid JSON - ${errorMsg}`); + } + } + // Report validation results + if (errors.length > 0) { + core.warning("Validation errors found:"); + errors.forEach(error => core.warning(` - ${error}`)); + if (parsedItems.length === 0) { + core.setFailed(errors.map(e => ` - ${e}`).join("\n")); + return; + } + // For now, we'll continue with valid items but log the errors + // In the future, we might want to fail the workflow for invalid items + } + core.info(`Successfully parsed ${parsedItems.length} valid output items`); + // Set the parsed and validated items as output + const validatedOutput = { + items: parsedItems, + errors: errors, + }; + // Store validatedOutput JSON in "agent_output.json" file + const agentOutputFile = "/tmp/agent_output.json"; + const validatedOutputJson = JSON.stringify(validatedOutput); + try { + // Ensure the /tmp directory exists + fs.mkdirSync("/tmp", { recursive: true }); + fs.writeFileSync(agentOutputFile, validatedOutputJson, "utf8"); + core.info(`Stored validated output to: ${agentOutputFile}`); + // Set the environment variable GITHUB_AW_AGENT_OUTPUT to the file path + core.exportVariable("GITHUB_AW_AGENT_OUTPUT", agentOutputFile); + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + core.error(`Failed to write agent output file: ${errorMsg}`); + } + core.setOutput("output", JSON.stringify(validatedOutput)); + core.setOutput("raw_output", outputContent); + } + // Call the main function + await main(); + - name: Print sanitized agent output + run: | + echo "## Processed Output" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo '``````json' >> $GITHUB_STEP_SUMMARY + echo '${{ steps.collect_output.outputs.output }}' >> $GITHUB_STEP_SUMMARY + echo '``````' >> $GITHUB_STEP_SUMMARY + - name: Upload sanitized agent output + if: always() && env.GITHUB_AW_AGENT_OUTPUT + uses: actions/upload-artifact@v4 + with: + name: agent_output.json + path: ${{ env.GITHUB_AW_AGENT_OUTPUT }} + if-no-files-found: warn + - name: Upload engine output files + uses: actions/upload-artifact@v4 + with: + name: agent_outputs + path: | + output.txt + if-no-files-found: ignore + - name: Clean up engine output files + run: | + rm -f output.txt + - name: Parse agent logs for step summary + if: always() + uses: actions/github-script@v7 + env: + GITHUB_AW_AGENT_OUTPUT: /tmp/test-claude-with-missing-tool-safe-output-and-cache-memory.log + with: + script: | + function main() { + const fs = require("fs"); + try { + // Get the log file path from environment + const logFile = process.env.GITHUB_AW_AGENT_OUTPUT; + if (!logFile) { + core.info("No agent log file specified"); + return; + } + if (!fs.existsSync(logFile)) { + core.info(`Log file not found: ${logFile}`); + return; + } + const logContent = fs.readFileSync(logFile, "utf8"); + const result = parseClaudeLog(logContent); + // Append to GitHub step summary + core.summary.addRaw(result.markdown).write(); + // Check for MCP server failures and fail the job if any occurred + if (result.mcpFailures && result.mcpFailures.length > 0) { + const failedServers = result.mcpFailures.join(", "); + core.setFailed(`MCP server(s) failed to launch: ${failedServers}`); + } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + core.setFailed(errorMessage); + } + } + /** + * Parses Claude log content and converts it to markdown format + * @param {string} logContent - The raw log content as a string + * @returns {{markdown: string, mcpFailures: string[]}} Result with formatted markdown content and MCP failure list + */ + function parseClaudeLog(logContent) { + try { + const logEntries = JSON.parse(logContent); + if (!Array.isArray(logEntries)) { + return { + markdown: + "## Agent Log Summary\n\nLog format not recognized as Claude JSON array.\n", + mcpFailures: [], + }; + } + let markdown = ""; + const mcpFailures = []; + // Check for initialization data first + const initEntry = logEntries.find( + entry => entry.type === "system" && entry.subtype === "init" + ); + if (initEntry) { + markdown += "## 🚀 Initialization\n\n"; + const initResult = formatInitializationSummary(initEntry); + markdown += initResult.markdown; + mcpFailures.push(...initResult.mcpFailures); + markdown += "\n"; + } + markdown += "## 🤖 Commands and Tools\n\n"; + const toolUsePairs = new Map(); // Map tool_use_id to tool_result + const commandSummary = []; // For the succinct summary + // First pass: collect tool results by tool_use_id + for (const entry of logEntries) { + if (entry.type === "user" && entry.message?.content) { + for (const content of entry.message.content) { + if (content.type === "tool_result" && content.tool_use_id) { + toolUsePairs.set(content.tool_use_id, content); + } + } + } + } + // Collect all tool uses for summary + for (const entry of logEntries) { + if (entry.type === "assistant" && entry.message?.content) { + for (const content of entry.message.content) { + if (content.type === "tool_use") { + const toolName = content.name; + const input = content.input || {}; + // Skip internal tools - only show external commands and API calls + if ( + [ + "Read", + "Write", + "Edit", + "MultiEdit", + "LS", + "Grep", + "Glob", + "TodoWrite", + ].includes(toolName) + ) { + continue; // Skip internal file operations and searches + } + // Find the corresponding tool result to get status + const toolResult = toolUsePairs.get(content.id); + let statusIcon = "❓"; + if (toolResult) { + statusIcon = toolResult.is_error === true ? "❌" : "✅"; + } + // Add to command summary (only external tools) + if (toolName === "Bash") { + const formattedCommand = formatBashCommand(input.command || ""); + commandSummary.push(`* ${statusIcon} \`${formattedCommand}\``); + } else if (toolName.startsWith("mcp__")) { + const mcpName = formatMcpName(toolName); + commandSummary.push(`* ${statusIcon} \`${mcpName}(...)\``); + } else { + // Handle other external tools (if any) + commandSummary.push(`* ${statusIcon} ${toolName}`); + } + } + } + } + } + // Add command summary + if (commandSummary.length > 0) { + for (const cmd of commandSummary) { + markdown += `${cmd}\n`; + } + } else { + markdown += "No commands or tools used.\n"; + } + // Add Information section from the last entry with result metadata + markdown += "\n## 📊 Information\n\n"; + // Find the last entry with metadata + const lastEntry = logEntries[logEntries.length - 1]; + if ( + lastEntry && + (lastEntry.num_turns || + lastEntry.duration_ms || + lastEntry.total_cost_usd || + lastEntry.usage) + ) { + if (lastEntry.num_turns) { + markdown += `**Turns:** ${lastEntry.num_turns}\n\n`; + } + if (lastEntry.duration_ms) { + const durationSec = Math.round(lastEntry.duration_ms / 1000); + const minutes = Math.floor(durationSec / 60); + const seconds = durationSec % 60; + markdown += `**Duration:** ${minutes}m ${seconds}s\n\n`; + } + if (lastEntry.total_cost_usd) { + markdown += `**Total Cost:** $${lastEntry.total_cost_usd.toFixed(4)}\n\n`; + } + if (lastEntry.usage) { + const usage = lastEntry.usage; + if (usage.input_tokens || usage.output_tokens) { + markdown += `**Token Usage:**\n`; + if (usage.input_tokens) + markdown += `- Input: ${usage.input_tokens.toLocaleString()}\n`; + if (usage.cache_creation_input_tokens) + markdown += `- Cache Creation: ${usage.cache_creation_input_tokens.toLocaleString()}\n`; + if (usage.cache_read_input_tokens) + markdown += `- Cache Read: ${usage.cache_read_input_tokens.toLocaleString()}\n`; + if (usage.output_tokens) + markdown += `- Output: ${usage.output_tokens.toLocaleString()}\n`; + markdown += "\n"; + } + } + if ( + lastEntry.permission_denials && + lastEntry.permission_denials.length > 0 + ) { + markdown += `**Permission Denials:** ${lastEntry.permission_denials.length}\n\n`; + } + } + markdown += "\n## 🤖 Reasoning\n\n"; + // Second pass: process assistant messages in sequence + for (const entry of logEntries) { + if (entry.type === "assistant" && entry.message?.content) { + for (const content of entry.message.content) { + if (content.type === "text" && content.text) { + // Add reasoning text directly (no header) + const text = content.text.trim(); + if (text && text.length > 0) { + markdown += text + "\n\n"; + } + } else if (content.type === "tool_use") { + // Process tool use with its result + const toolResult = toolUsePairs.get(content.id); + const toolMarkdown = formatToolUse(content, toolResult); + if (toolMarkdown) { + markdown += toolMarkdown; + } + } + } + } + } + return { markdown, mcpFailures }; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + return { + markdown: `## Agent Log Summary\n\nError parsing Claude log: ${errorMessage}\n`, + mcpFailures: [], + }; + } + } + /** + * Formats initialization information from system init entry + * @param {any} initEntry - The system init entry containing tools, mcp_servers, etc. + * @returns {{markdown: string, mcpFailures: string[]}} Result with formatted markdown string and MCP failure list + */ + function formatInitializationSummary(initEntry) { + let markdown = ""; + const mcpFailures = []; + // Display model and session info + if (initEntry.model) { + markdown += `**Model:** ${initEntry.model}\n\n`; + } + if (initEntry.session_id) { + markdown += `**Session ID:** ${initEntry.session_id}\n\n`; + } + if (initEntry.cwd) { + // Show a cleaner path by removing common prefixes + const cleanCwd = initEntry.cwd.replace( + /^\/home\/runner\/work\/[^\/]+\/[^\/]+/, + "." + ); + markdown += `**Working Directory:** ${cleanCwd}\n\n`; + } + // Display MCP servers status + if (initEntry.mcp_servers && Array.isArray(initEntry.mcp_servers)) { + markdown += "**MCP Servers:**\n"; + for (const server of initEntry.mcp_servers) { + const statusIcon = + server.status === "connected" + ? "✅" + : server.status === "failed" + ? "❌" + : "❓"; + markdown += `- ${statusIcon} ${server.name} (${server.status})\n`; + // Track failed MCP servers + if (server.status === "failed") { + mcpFailures.push(server.name); + } + } + markdown += "\n"; + } + // Display tools by category + if (initEntry.tools && Array.isArray(initEntry.tools)) { + markdown += "**Available Tools:**\n"; + // Categorize tools + /** @type {{ [key: string]: string[] }} */ + const categories = { + Core: [], + "File Operations": [], + "Git/GitHub": [], + MCP: [], + Other: [], + }; + for (const tool of initEntry.tools) { + if ( + ["Task", "Bash", "BashOutput", "KillBash", "ExitPlanMode"].includes( + tool + ) + ) { + categories["Core"].push(tool); + } else if ( + [ + "Read", + "Edit", + "MultiEdit", + "Write", + "LS", + "Grep", + "Glob", + "NotebookEdit", + ].includes(tool) + ) { + categories["File Operations"].push(tool); + } else if (tool.startsWith("mcp__github__")) { + categories["Git/GitHub"].push(formatMcpName(tool)); + } else if ( + tool.startsWith("mcp__") || + ["ListMcpResourcesTool", "ReadMcpResourceTool"].includes(tool) + ) { + categories["MCP"].push( + tool.startsWith("mcp__") ? formatMcpName(tool) : tool + ); + } else { + categories["Other"].push(tool); + } + } + // Display categories with tools + for (const [category, tools] of Object.entries(categories)) { + if (tools.length > 0) { + markdown += `- **${category}:** ${tools.length} tools\n`; + if (tools.length <= 5) { + // Show all tools if 5 or fewer + markdown += ` - ${tools.join(", ")}\n`; + } else { + // Show first few and count + markdown += ` - ${tools.slice(0, 3).join(", ")}, and ${tools.length - 3} more\n`; + } + } + } + markdown += "\n"; + } + // Display slash commands if available + if (initEntry.slash_commands && Array.isArray(initEntry.slash_commands)) { + const commandCount = initEntry.slash_commands.length; + markdown += `**Slash Commands:** ${commandCount} available\n`; + if (commandCount <= 10) { + markdown += `- ${initEntry.slash_commands.join(", ")}\n`; + } else { + markdown += `- ${initEntry.slash_commands.slice(0, 5).join(", ")}, and ${commandCount - 5} more\n`; + } + markdown += "\n"; + } + return { markdown, mcpFailures }; + } + /** + * Formats a tool use entry with its result into markdown + * @param {any} toolUse - The tool use object containing name, input, etc. + * @param {any} toolResult - The corresponding tool result object + * @returns {string} Formatted markdown string + */ + function formatToolUse(toolUse, toolResult) { + const toolName = toolUse.name; + const input = toolUse.input || {}; + // Skip TodoWrite except the very last one (we'll handle this separately) + if (toolName === "TodoWrite") { + return ""; // Skip for now, would need global context to find the last one + } + // Helper function to determine status icon + function getStatusIcon() { + if (toolResult) { + return toolResult.is_error === true ? "❌" : "✅"; + } + return "❓"; // Unknown by default + } + let markdown = ""; + const statusIcon = getStatusIcon(); + switch (toolName) { + case "Bash": + const command = input.command || ""; + const description = input.description || ""; + // Format the command to be single line + const formattedCommand = formatBashCommand(command); + if (description) { + markdown += `${description}:\n\n`; + } + markdown += `${statusIcon} \`${formattedCommand}\`\n\n`; + break; + case "Read": + const filePath = input.file_path || input.path || ""; + const relativePath = filePath.replace( + /^\/[^\/]*\/[^\/]*\/[^\/]*\/[^\/]*\//, + "" + ); // Remove /home/runner/work/repo/repo/ prefix + markdown += `${statusIcon} Read \`${relativePath}\`\n\n`; + break; + case "Write": + case "Edit": + case "MultiEdit": + const writeFilePath = input.file_path || input.path || ""; + const writeRelativePath = writeFilePath.replace( + /^\/[^\/]*\/[^\/]*\/[^\/]*\/[^\/]*\//, + "" + ); + markdown += `${statusIcon} Write \`${writeRelativePath}\`\n\n`; + break; + case "Grep": + case "Glob": + const query = input.query || input.pattern || ""; + markdown += `${statusIcon} Search for \`${truncateString(query, 80)}\`\n\n`; + break; + case "LS": + const lsPath = input.path || ""; + const lsRelativePath = lsPath.replace( + /^\/[^\/]*\/[^\/]*\/[^\/]*\/[^\/]*\//, + "" + ); + markdown += `${statusIcon} LS: ${lsRelativePath || lsPath}\n\n`; + break; + default: + // Handle MCP calls and other tools + if (toolName.startsWith("mcp__")) { + const mcpName = formatMcpName(toolName); + const params = formatMcpParameters(input); + markdown += `${statusIcon} ${mcpName}(${params})\n\n`; + } else { + // Generic tool formatting - show the tool name and main parameters + const keys = Object.keys(input); + if (keys.length > 0) { + // Try to find the most important parameter + const mainParam = + keys.find(k => + ["query", "command", "path", "file_path", "content"].includes(k) + ) || keys[0]; + const value = String(input[mainParam] || ""); + if (value) { + markdown += `${statusIcon} ${toolName}: ${truncateString(value, 100)}\n\n`; + } else { + markdown += `${statusIcon} ${toolName}\n\n`; + } + } else { + markdown += `${statusIcon} ${toolName}\n\n`; + } + } + } + return markdown; + } + /** + * Formats MCP tool name from internal format to display format + * @param {string} toolName - The raw tool name (e.g., mcp__github__search_issues) + * @returns {string} Formatted tool name (e.g., github::search_issues) + */ + function formatMcpName(toolName) { + // Convert mcp__github__search_issues to github::search_issues + if (toolName.startsWith("mcp__")) { + const parts = toolName.split("__"); + if (parts.length >= 3) { + const provider = parts[1]; // github, etc. + const method = parts.slice(2).join("_"); // search_issues, etc. + return `${provider}::${method}`; + } + } + return toolName; + } + /** + * Formats MCP parameters into a human-readable string + * @param {Record} input - The input object containing parameters + * @returns {string} Formatted parameters string + */ + function formatMcpParameters(input) { + const keys = Object.keys(input); + if (keys.length === 0) return ""; + const paramStrs = []; + for (const key of keys.slice(0, 4)) { + // Show up to 4 parameters + const value = String(input[key] || ""); + paramStrs.push(`${key}: ${truncateString(value, 40)}`); + } + if (keys.length > 4) { + paramStrs.push("..."); + } + return paramStrs.join(", "); + } + /** + * Formats a bash command by normalizing whitespace and escaping + * @param {string} command - The raw bash command string + * @returns {string} Formatted and escaped command string + */ + function formatBashCommand(command) { + if (!command) return ""; + // Convert multi-line commands to single line by replacing newlines with spaces + // and collapsing multiple spaces + let formatted = command + .replace(/\n/g, " ") // Replace newlines with spaces + .replace(/\r/g, " ") // Replace carriage returns with spaces + .replace(/\t/g, " ") // Replace tabs with spaces + .replace(/\s+/g, " ") // Collapse multiple spaces into one + .trim(); // Remove leading/trailing whitespace + // Escape backticks to prevent markdown issues + formatted = formatted.replace(/`/g, "\\`"); + // Truncate if too long (keep reasonable length for summary) + const maxLength = 80; + if (formatted.length > maxLength) { + formatted = formatted.substring(0, maxLength) + "..."; + } + return formatted; + } + /** + * Truncates a string to a maximum length with ellipsis + * @param {string} str - The string to truncate + * @param {number} maxLength - Maximum allowed length + * @returns {string} Truncated string with ellipsis if needed + */ + function truncateString(str, maxLength) { + if (!str) return ""; + if (str.length <= maxLength) return str; + return str.substring(0, maxLength) + "..."; + } + // Export for testing + if (typeof module !== "undefined" && module.exports) { + module.exports = { + parseClaudeLog, + formatToolUse, + formatInitializationSummary, + formatBashCommand, + truncateString, + }; + } + main(); + - name: Upload agent logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-claude-with-missing-tool-safe-output-and-cache-memory.log + path: /tmp/test-claude-with-missing-tool-safe-output-and-cache-memory.log + if-no-files-found: warn + + missing_tool: + needs: test-claude-with-missing-tool-safe-output-and-cache-memory + if: ${{ always() }} + runs-on: ubuntu-latest + permissions: + contents: read + timeout-minutes: 5 + outputs: + tools_reported: ${{ steps.missing_tool.outputs.tools_reported }} + total_count: ${{ steps.missing_tool.outputs.total_count }} + steps: + - name: Record Missing Tool + id: missing_tool + uses: actions/github-script@v7 + env: + GITHUB_AW_AGENT_OUTPUT: ${{ needs.test-claude-with-missing-tool-safe-output-and-cache-memory.outputs.output }} + GITHUB_AW_MISSING_TOOL_MAX: 5 + with: + script: | + async function main() { + const fs = require("fs"); + // Get environment variables + const agentOutput = process.env.GITHUB_AW_AGENT_OUTPUT || ""; + const maxReports = process.env.GITHUB_AW_MISSING_TOOL_MAX + ? parseInt(process.env.GITHUB_AW_MISSING_TOOL_MAX) + : null; + core.info("Processing missing-tool reports..."); + core.info(`Agent output length: ${agentOutput.length}`); + if (maxReports) { + core.info(`Maximum reports allowed: ${maxReports}`); + } + /** @type {any[]} */ + const missingTools = []; + // Return early if no agent output + if (!agentOutput.trim()) { + core.info("No agent output to process"); + core.setOutput("tools_reported", JSON.stringify(missingTools)); + core.setOutput("total_count", missingTools.length.toString()); + return; + } + // Parse the validated output JSON + let validatedOutput; + try { + validatedOutput = JSON.parse(agentOutput); + } catch (error) { + core.setFailed( + `Error parsing agent output JSON: ${error instanceof Error ? error.message : String(error)}` + ); + return; + } + if (!validatedOutput.items || !Array.isArray(validatedOutput.items)) { + core.info("No valid items found in agent output"); + core.setOutput("tools_reported", JSON.stringify(missingTools)); + core.setOutput("total_count", missingTools.length.toString()); + return; + } + core.info(`Parsed agent output with ${validatedOutput.items.length} entries`); + // Process all parsed entries + for (const entry of validatedOutput.items) { + if (entry.type === "missing-tool") { + // Validate required fields + if (!entry.tool) { + core.warning( + `missing-tool entry missing 'tool' field: ${JSON.stringify(entry)}` + ); + continue; + } + if (!entry.reason) { + core.warning( + `missing-tool entry missing 'reason' field: ${JSON.stringify(entry)}` + ); + continue; + } + const missingTool = { + tool: entry.tool, + reason: entry.reason, + alternatives: entry.alternatives || null, + timestamp: new Date().toISOString(), + }; + missingTools.push(missingTool); + core.info(`Recorded missing tool: ${missingTool.tool}`); + // Check max limit + if (maxReports && missingTools.length >= maxReports) { + core.info( + `Reached maximum number of missing tool reports (${maxReports})` + ); + break; + } + } + } + core.info(`Total missing tools reported: ${missingTools.length}`); + // Output results + core.setOutput("tools_reported", JSON.stringify(missingTools)); + core.setOutput("total_count", missingTools.length.toString()); + // Log details for debugging + if (missingTools.length > 0) { + core.info("Missing tools summary:"); + missingTools.forEach((tool, index) => { + core.info(`${index + 1}. Tool: ${tool.tool}`); + core.info(` Reason: ${tool.reason}`); + if (tool.alternatives) { + core.info(` Alternatives: ${tool.alternatives}`); + } + core.info(` Reported at: ${tool.timestamp}`); + core.info(""); + }); + } else { + core.info("No missing tools reported in this workflow execution."); + } + } + main().catch(error => { + core.error(`Error processing missing-tool reports: ${error}`); + core.setFailed(`Error processing missing-tool reports: ${error}`); + }); + diff --git a/pkg/cli/workflows/test-claude-missing-tool.md b/pkg/cli/workflows/test-claude-missing-tool.md new file mode 100644 index 00000000000..98a872e3671 --- /dev/null +++ b/pkg/cli/workflows/test-claude-missing-tool.md @@ -0,0 +1,61 @@ +--- +engine: claude +on: + workflow_dispatch: + inputs: + missing_tool: + description: 'Tool to report as missing' + required: true + default: 'example-missing-tool' + +tools: + cache-memory: true + github: + allowed: [get_repository] + +safe-outputs: + missing-tool: + max: 5 + staged: true + +timeout_minutes: 5 +--- + +# Test Claude with Missing Tool Safe Output and Cache Memory + +You are a test agent that demonstrates the missing-tool safe output functionality with Claude engine, enhanced with persistent memory. + +## Task + +Your job is to: + +1. **Check your memory** for any previous missing tool reports +2. **Report a missing tool** using the safe output functionality +3. **Store the report in memory** for future reference +4. **Use GitHub tools** to get basic repository information + +## Instructions + +1. First, check your memory to see if you've reported any missing tools before +2. Report that the tool specified in the input (${{ github.event.inputs.missing_tool }}) is missing +3. Use the safe output functionality to properly report the missing tool with: + - Tool name: ${{ github.event.inputs.missing_tool }} + - Reason: "This tool is not available in the current workflow environment" + - Alternatives: "Consider using alternative tools or requesting tool addition" + - Context: "Testing missing-tool safe output with Claude engine" +4. Store this report in your memory for future reference +5. Get basic information about this repository using the GitHub tool +6. Provide a summary of your actions + +## Expected Behavior + +- **Memory persistence**: Should remember previous missing tool reports across runs +- **Safe output**: Should generate proper missing-tool JSON output +- **Tool integration**: Should work with GitHub tools despite the "missing" tool +- **Staged mode**: Should prevent actual GitHub interactions when staged: true + +This workflow tests that: +- Claude can properly report missing tools using safe outputs +- Cache-memory preserves missing tool reports across runs +- Integration works with existing tools like GitHub +- Safe output system handles missing tool scenarios correctly \ No newline at end of file diff --git a/pkg/parser/schemas/main_workflow_schema.json b/pkg/parser/schemas/main_workflow_schema.json index 90186fa2cf9..c07f5cd785a 100644 --- a/pkg/parser/schemas/main_workflow_schema.json +++ b/pkg/parser/schemas/main_workflow_schema.json @@ -1113,6 +1113,36 @@ "additionalProperties": false } ] + }, + "cache-memory": { + "description": "Cache memory MCP configuration for persistent memory storage", + "oneOf": [ + { + "type": "boolean", + "description": "Enable cache-memory with default settings" + }, + { + "type": "object", + "description": "Cache-memory configuration object", + "properties": { + "key": { + "type": "string", + "description": "Custom cache key for memory MCP data (restore keys are auto-generated by splitting on '-')" + }, + "docker-image": { + "type": "string", + "description": "Docker image to use for the memory MCP server (default: mcp/memory)" + }, + "retention-days": { + "type": "integer", + "minimum": 1, + "maximum": 90, + "description": "Number of days to retain uploaded artifacts (1-90 days, default: repository setting)" + } + }, + "additionalProperties": false + } + ] } }, "additionalProperties": { diff --git a/pkg/workflow/cache.go b/pkg/workflow/cache.go index 145d34871ec..2c9bc203205 100644 --- a/pkg/workflow/cache.go +++ b/pkg/workflow/cache.go @@ -63,7 +63,7 @@ func generateCacheSteps(builder *strings.Builder, data *WorkflowData, verbose bo } fmt.Fprintf(builder, " - name: %s\n", stepName) - builder.WriteString(" uses: actions/cache@v3\n") + builder.WriteString(" uses: actions/cache@v4\n") builder.WriteString(" with:\n") // Add required cache parameters @@ -103,3 +103,58 @@ func generateCacheSteps(builder *strings.Builder, data *WorkflowData, verbose bo } } } + +// generateCacheMemorySteps generates cache steps for the cache-memory configuration +func generateCacheMemorySteps(builder *strings.Builder, data *WorkflowData, verbose bool) { + if data.CacheMemoryConfig == nil || !data.CacheMemoryConfig.Enabled { + return + } + + // Add comment indicating cache-memory configuration was processed + builder.WriteString(" # Cache memory MCP configuration from frontmatter processed below\n") + + // Add step to create cache-memory directory + builder.WriteString(" - name: Create cache-memory directory\n") + builder.WriteString(" run: mkdir -p /tmp/cache-memory\n") + + // Use the parsed configuration + cacheKey := data.CacheMemoryConfig.Key + if cacheKey == "" { + cacheKey = "memory-${{ github.workflow }}-${{ github.run_id }}" + } + + // Automatically append -${{ github.run_id }} if the key doesn't already end with it + runIdSuffix := "-${{ github.run_id }}" + if !strings.HasSuffix(cacheKey, runIdSuffix) { + cacheKey = cacheKey + runIdSuffix + } + + // Generate restore keys automatically by splitting the cache key on '-' + // This creates a progressive fallback hierarchy + var restoreKeys []string + keyParts := strings.Split(cacheKey, "-") + for i := len(keyParts) - 1; i > 0; i-- { + restoreKey := strings.Join(keyParts[:i], "-") + "-" + restoreKeys = append(restoreKeys, restoreKey) + } + + builder.WriteString(" - name: Cache memory MCP data\n") + builder.WriteString(" uses: actions/cache@v4\n") + builder.WriteString(" with:\n") + fmt.Fprintf(builder, " key: %s\n", cacheKey) + builder.WriteString(" path: /tmp/cache-memory\n") + builder.WriteString(" restore-keys: |\n") + for _, key := range restoreKeys { + fmt.Fprintf(builder, " %s\n", key) + } + + // Add upload-artifact step if retention-days is configured + if data.CacheMemoryConfig.RetentionDays != nil { + builder.WriteString(" - name: Upload memory MCP data as artifact\n") + builder.WriteString(" uses: actions/upload-artifact@v4\n") + builder.WriteString(" with:\n") + builder.WriteString(" name: cache-memory-data\n") + builder.WriteString(" path: /tmp/cache-memory\n") + fmt.Fprintf(builder, " retention-days: %d\n", *data.CacheMemoryConfig.RetentionDays) + } +} diff --git a/pkg/workflow/claude_engine.go b/pkg/workflow/claude_engine.go index e945fbc4722..5384008c4cf 100644 --- a/pkg/workflow/claude_engine.go +++ b/pkg/workflow/claude_engine.go @@ -452,6 +452,14 @@ func (e *ClaudeEngine) computeAllowedClaudeToolsString(tools map[string]any, saf // Skip the claude section as we've already processed it continue } else { + // Handle cache-memory as a special case first (can be boolean or map) + if toolName == "cache-memory" { + // For cache-memory, it's configured as MCP server "memory" and has no allowed restrictions + // Default to wildcard access since cache-memory doesn't specify allowed tools + allowedTools = append(allowedTools, "mcp__memory") + continue + } + // Check if this is an MCP tool (has MCP-compatible type) or standard MCP tool (github) if mcpConfig, ok := toolValue.(map[string]any); ok { // Check if it's explicitly marked as MCP type @@ -548,6 +556,8 @@ func (e *ClaudeEngine) RenderMCPConfig(yaml *strings.Builder, tools map[string]a case "playwright": playwrightTool := tools["playwright"] e.renderPlaywrightMCPConfig(yaml, playwrightTool, isLast, workflowData.NetworkPermissions) + case "cache-memory": + e.renderCacheMemoryMCPConfig(yaml, isLast, workflowData) default: // Handle custom MCP tools (those with MCP-compatible type) if toolConfig, ok := tools[toolName].(map[string]any); ok { @@ -654,6 +664,25 @@ func (e *ClaudeEngine) renderClaudeMCPConfig(yaml *strings.Builder, toolName str return nil } +// renderCacheMemoryMCPConfig generates the Memory MCP server configuration +// Uses npx-based @modelcontextprotocol/server-memory setup +func (e *ClaudeEngine) renderCacheMemoryMCPConfig(yaml *strings.Builder, isLast bool, workflowData *WorkflowData) { + yaml.WriteString(" \"memory\": {\n") + yaml.WriteString(" \"command\": \"npx\",\n") + yaml.WriteString(" \"args\": [\n") + yaml.WriteString(" \"@modelcontextprotocol/server-memory\"\n") + yaml.WriteString(" ],\n") + yaml.WriteString(" \"env\": {\n") + yaml.WriteString(" \"MEMORY_FILE_PATH\": \"/tmp/cache-memory/memory.json\"\n") + yaml.WriteString(" }\n") + + if isLast { + yaml.WriteString(" }\n") + } else { + yaml.WriteString(" },\n") + } +} + // ParseLogMetrics implements engine-specific log parsing for Claude func (e *ClaudeEngine) ParseLogMetrics(logContent string, verbose bool) LogMetrics { var metrics LogMetrics diff --git a/pkg/workflow/claude_engine_tools_test.go b/pkg/workflow/claude_engine_tools_test.go index 1e65c711bc9..c756ef290b3 100644 --- a/pkg/workflow/claude_engine_tools_test.go +++ b/pkg/workflow/claude_engine_tools_test.go @@ -49,6 +49,32 @@ func TestClaudeEngineComputeAllowedTools(t *testing.T) { }, expected: "ExitPlanMode,Glob,Grep,LS,NotebookRead,Read,Task,TodoWrite,mcp__github__create_issue,mcp__github__list_issues", }, + { + name: "cache-memory tool (should get wildcard access as mcp__memory)", + tools: map[string]any{ + "cache-memory": map[string]any{ + "key": "test-memory-key", + }, + }, + expected: "ExitPlanMode,Glob,Grep,LS,NotebookRead,Read,Task,TodoWrite,mcp__memory", + }, + { + name: "cache-memory with boolean true", + tools: map[string]any{ + "cache-memory": true, + }, + expected: "ExitPlanMode,Glob,Grep,LS,NotebookRead,Read,Task,TodoWrite,mcp__memory", + }, + { + name: "cache-memory with github tools", + tools: map[string]any{ + "cache-memory": true, + "github": map[string]any{ + "allowed": []any{"get_repository"}, + }, + }, + expected: "ExitPlanMode,Glob,Grep,LS,NotebookRead,Read,Task,TodoWrite,mcp__github__get_repository,mcp__memory", + }, { name: "mixed neutral and mcp tools", tools: map[string]any{ diff --git a/pkg/workflow/compiler.go b/pkg/workflow/compiler.go index b663562e7e1..3714bd6357a 100644 --- a/pkg/workflow/compiler.go +++ b/pkg/workflow/compiler.go @@ -149,6 +149,15 @@ type WorkflowData struct { NetworkPermissions *NetworkPermissions // parsed network permissions SafeOutputs *SafeOutputsConfig // output configuration for automatic output routes Roles []string // permission levels required to trigger workflow + CacheMemoryConfig *CacheMemoryConfig // parsed cache-memory configuration +} + +// CacheMemoryConfig holds configuration for cache-memory functionality +type CacheMemoryConfig struct { + Enabled bool `yaml:"enabled,omitempty"` // whether cache-memory is enabled + Key string `yaml:"key,omitempty"` // custom cache key + DockerImage string `yaml:"docker-image,omitempty"` // deprecated: no longer used (npx is used instead) + RetentionDays *int `yaml:"retention-days,omitempty"` // retention days for upload-artifact action } // SafeOutputsConfig holds configuration for automatic output routes @@ -648,6 +657,7 @@ func (c *Compiler) parseWorkflowFile(markdownPath string) (*WorkflowData, error) workflowData.PostSteps = c.extractTopLevelYAMLSection(result.Frontmatter, "post-steps") workflowData.RunsOn = c.extractTopLevelYAMLSection(result.Frontmatter, "runs-on") workflowData.Cache = c.extractTopLevelYAMLSection(result.Frontmatter, "cache") + workflowData.CacheMemoryConfig = c.extractCacheMemoryConfig(topTools) // Process stop-after configuration from the on: section err = c.processStopAfterConfiguration(result.Frontmatter, workflowData) @@ -2801,7 +2811,7 @@ func (c *Compiler) generateMCPSetup(yaml *strings.Builder, tools map[string]any, for toolName, toolValue := range workflowTools { // Standard MCP tools - if toolName == "github" || toolName == "playwright" { + if toolName == "github" || toolName == "playwright" || toolName == "cache-memory" { mcpTools = append(mcpTools, toolName) } else if mcpConfig, ok := toolValue.(map[string]any); ok { // Check if it's explicitly marked as MCP type in the new format @@ -2990,6 +3000,9 @@ func (c *Compiler) generateMainJobSteps(yaml *strings.Builder, data *WorkflowDat // Add cache steps if cache configuration is present generateCacheSteps(yaml, data, c.verbose) + // Add cache-memory steps if cache-memory configuration is present + generateCacheMemorySteps(yaml, data, c.verbose) + // Configure git credentials if git operations will be needed if needsGitCommands(data.SafeOutputs) { c.generateGitConfiguration(yaml, data) @@ -3712,6 +3725,71 @@ func (c *Compiler) extractSafeOutputsConfig(frontmatter map[string]any) *SafeOut return config } +// extractCacheMemoryConfig extracts cache-memory configuration from tools section +func (c *Compiler) extractCacheMemoryConfig(tools map[string]any) *CacheMemoryConfig { + cacheMemoryValue, exists := tools["cache-memory"] + if !exists { + return nil + } + + config := &CacheMemoryConfig{} + + // Handle boolean value (simple enable/disable) + if boolValue, ok := cacheMemoryValue.(bool); ok { + config.Enabled = boolValue + if config.Enabled { + // Set defaults + config.Key = "memory-${{ github.workflow }}-${{ github.run_id }}" + } + return config + } + + // Handle object configuration + if configMap, ok := cacheMemoryValue.(map[string]any); ok { + config.Enabled = true + + // Set defaults + config.Key = "memory-${{ github.workflow }}-${{ github.run_id }}" + + // Parse custom key + if key, exists := configMap["key"]; exists { + if keyStr, ok := key.(string); ok { + config.Key = keyStr + // Automatically append -${{ github.run_id }} if the key doesn't already end with it + runIdSuffix := "-${{ github.run_id }}" + if !strings.HasSuffix(config.Key, runIdSuffix) { + config.Key = config.Key + runIdSuffix + } + } + } + + // Parse custom docker image (deprecated) + if dockerImage, exists := configMap["docker-image"]; exists { + if dockerImageStr, ok := dockerImage.(string); ok { + config.DockerImage = dockerImageStr + // Note: docker-image is deprecated and ignored when using npx + } + } + + // Parse retention days + if retentionDays, exists := configMap["retention-days"]; exists { + if retentionDaysInt, ok := retentionDays.(int); ok { + config.RetentionDays = &retentionDaysInt + } else if retentionDaysFloat, ok := retentionDays.(float64); ok { + retentionDaysIntValue := int(retentionDaysFloat) + config.RetentionDays = &retentionDaysIntValue + } else if retentionDaysUint64, ok := retentionDays.(uint64); ok { + retentionDaysIntValue := int(retentionDaysUint64) + config.RetentionDays = &retentionDaysIntValue + } + } + + return config + } + + return nil +} + // parseIssuesConfig handles create-issue configuration func (c *Compiler) parseIssuesConfig(outputMap map[string]any) *CreateIssuesConfig { if configData, exists := outputMap["create-issue"]; exists { diff --git a/pkg/workflow/compiler_test.go b/pkg/workflow/compiler_test.go index 94632f8594b..32fae70ed68 100644 --- a/pkg/workflow/compiler_test.go +++ b/pkg/workflow/compiler_test.go @@ -3849,7 +3849,7 @@ tools: "# Cache configuration from frontmatter was processed and added to the main job steps", "# Cache configuration from frontmatter processed below", "- name: Cache", - "uses: actions/cache@v3", + "uses: actions/cache@v4", "key: node-modules-${{ hashFiles('package-lock.json') }}", "path: node_modules", "restore-keys: node-modules-", @@ -3888,7 +3888,7 @@ tools: "# Cache configuration from frontmatter processed below", "- name: Cache (node-modules-${{ hashFiles('package-lock.json') }})", "- name: Cache (build-cache-${{ github.sha }})", - "uses: actions/cache@v3", + "uses: actions/cache@v4", "key: node-modules-${{ hashFiles('package-lock.json') }}", "key: build-cache-${{ github.sha }}", "path: node_modules", @@ -3926,7 +3926,7 @@ tools: expectedInLock: []string{ "# Cache configuration from frontmatter processed below", "- name: Cache", - "uses: actions/cache@v3", + "uses: actions/cache@v4", "key: full-cache-${{ github.sha }}", "path: dist", "restore-keys: |", diff --git a/pkg/workflow/custom_engine.go b/pkg/workflow/custom_engine.go index 2d0aa8a9e28..210da7f4bed 100644 --- a/pkg/workflow/custom_engine.go +++ b/pkg/workflow/custom_engine.go @@ -143,6 +143,8 @@ func (e *CustomEngine) RenderMCPConfig(yaml *strings.Builder, tools map[string]a case "playwright": playwrightTool := tools["playwright"] e.renderPlaywrightMCPConfig(yaml, playwrightTool, isLast, workflowData.NetworkPermissions) + case "cache-memory": + e.renderCacheMemoryMCPConfig(yaml, isLast, workflowData) default: // Handle custom MCP tools (those with MCP-compatible type) if toolConfig, ok := tools[toolName].(map[string]any); ok { @@ -248,6 +250,36 @@ func (e *CustomEngine) renderCustomMCPConfig(yaml *strings.Builder, toolName str return nil } +// renderCacheMemoryMCPConfig generates the Memory MCP server configuration using shared logic +// Uses Docker-based @modelcontextprotocol/server-memory setup +func (e *CustomEngine) renderCacheMemoryMCPConfig(yaml *strings.Builder, isLast bool, workflowData *WorkflowData) { + // Determine Docker image to use + dockerImage := "mcp/memory" // default from official documentation + if workflowData.CacheMemoryConfig != nil && workflowData.CacheMemoryConfig.DockerImage != "" { + dockerImage = workflowData.CacheMemoryConfig.DockerImage + } + + yaml.WriteString(" \"memory\": {\n") + yaml.WriteString(" \"command\": \"docker\",\n") + yaml.WriteString(" \"args\": [\n") + yaml.WriteString(" \"run\",\n") + yaml.WriteString(" \"-i\",\n") + yaml.WriteString(" \"--rm\",\n") + yaml.WriteString(" \"-v\",\n") + yaml.WriteString(" \"/tmp/cache-memory:/app/dist\",\n") + fmt.Fprintf(yaml, " \"%s\"\n", dockerImage) + yaml.WriteString(" ],\n") + yaml.WriteString(" \"env\": {\n") + yaml.WriteString(" \"MEMORY_FILE_PATH\": \"/app/dist/memory.json\"\n") + yaml.WriteString(" }\n") + + if isLast { + yaml.WriteString(" }\n") + } else { + yaml.WriteString(" },\n") + } +} + // ParseLogMetrics implements basic log parsing for custom engine // For custom engines, try both Claude and Codex parsing approaches to extract turn information func (e *CustomEngine) ParseLogMetrics(logContent string, verbose bool) LogMetrics {