From 6e99bd60e34fa36d289c3484180d797ff8e9ea34 Mon Sep 17 00:00:00 2001 From: afterrburn Date: Wed, 18 Jun 2025 20:05:24 -0600 Subject: [PATCH 01/16] fix full doc sync --- .github/workflows/sync-docs-full.yml | 47 ++++++++++++++++------------ 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/.github/workflows/sync-docs-full.yml b/.github/workflows/sync-docs-full.yml index 906e8999..d3cb54f2 100644 --- a/.github/workflows/sync-docs-full.yml +++ b/.github/workflows/sync-docs-full.yml @@ -18,27 +18,34 @@ jobs: echo "Found files:" cat mdx_files.txt - # Create the changed array by processing each file through jq + # Create JSON array of changed files echo "Processing files..." - jq -n --slurpfile paths <( - while IFS= read -r path; do - [ -z "$path" ] && continue - if [ -f "content/$path" ]; then - echo "Processing: content/$path" - jq -n \ - --arg path "$path" \ - --arg content "$(base64 -w0 < "content/$path")" \ - '{path: $path, content: $content}' - fi - done < mdx_files.txt | jq -s '.' - ) \ - --slurpfile removed <(cat mdx_files.txt | jq -R . | jq -s .) \ - --arg repo "$GITHUB_REPOSITORY" \ - '{ - repo: $repo, - changed: ($paths | .[0] // []), - removed: ($removed | .[0] // []) - }' > payload.json + changed_files="[]" + while IFS= read -r path; do + [ -z "$path" ] && continue + if [ -f "content/$path" ]; then + echo "Processing: content/$path" >&2 + file_json=$(jq -n \ + --arg path "$path" \ + --arg content "$(base64 -w0 < "content/$path")" \ + '{path: $path, content: $content}') + changed_files=$(echo "$changed_files" | jq --argjson file "$file_json" '. + [$file]') + fi + done < mdx_files.txt + + # Create removed files array (same as changed for full sync) + removed_files=$(cat mdx_files.txt | jq -R . | jq -s .) + + # Create final payload + jq -n \ + --argjson changed "$changed_files" \ + --argjson removed "$removed_files" \ + --arg repo "$GITHUB_REPOSITORY" \ + '{ + repo: $repo, + changed: $changed, + removed: $removed + }' > payload.json # Show debug info echo "Payload structure (without contents):" From 73f56dde7af2d418aeacc9d624ecf0a474ac88c6 Mon Sep 17 00:00:00 2001 From: afterrburn Date: Wed, 18 Jun 2025 20:06:25 -0600 Subject: [PATCH 02/16] fix file read --- .github/workflows/sync-docs-full.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sync-docs-full.yml b/.github/workflows/sync-docs-full.yml index d3cb54f2..f52201d4 100644 --- a/.github/workflows/sync-docs-full.yml +++ b/.github/workflows/sync-docs-full.yml @@ -1,7 +1,7 @@ name: Full Docs Sync to Vector Store on: - workflow_dispatch: + push: jobs: sync: From ef7bfb6667df11b9c7d16727fd6a6d06576dfc7a Mon Sep 17 00:00:00 2001 From: afterrburn Date: Wed, 18 Jun 2025 20:07:55 -0600 Subject: [PATCH 03/16] store info in payload file --- .github/workflows/sync-docs-full.yml | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/.github/workflows/sync-docs-full.yml b/.github/workflows/sync-docs-full.yml index f52201d4..4e1c04ec 100644 --- a/.github/workflows/sync-docs-full.yml +++ b/.github/workflows/sync-docs-full.yml @@ -20,33 +20,41 @@ jobs: # Create JSON array of changed files echo "Processing files..." - changed_files="[]" + rm -f changed_files.json + echo "[]" > changed_files.json + while IFS= read -r path; do [ -z "$path" ] && continue if [ -f "content/$path" ]; then echo "Processing: content/$path" >&2 - file_json=$(jq -n \ + # Create individual file JSON and append to array + jq -n \ --arg path "$path" \ --arg content "$(base64 -w0 < "content/$path")" \ - '{path: $path, content: $content}') - changed_files=$(echo "$changed_files" | jq --argjson file "$file_json" '. + [$file]') + '{path: $path, content: $content}' > temp_file.json + # Append to the array using jq + jq '. + [input]' changed_files.json temp_file.json > temp_changed.json + mv temp_changed.json changed_files.json fi done < mdx_files.txt # Create removed files array (same as changed for full sync) - removed_files=$(cat mdx_files.txt | jq -R . | jq -s .) + cat mdx_files.txt | jq -R . | jq -s . > removed_files.json # Create final payload jq -n \ - --argjson changed "$changed_files" \ - --argjson removed "$removed_files" \ + --slurpfile changed changed_files.json \ + --slurpfile removed removed_files.json \ --arg repo "$GITHUB_REPOSITORY" \ '{ repo: $repo, - changed: $changed, - removed: $removed + changed: ($changed | .[0] // []), + removed: ($removed | .[0] // []) }' > payload.json + # Clean up temp files + rm -f changed_files.json removed_files.json temp_file.json + # Show debug info echo "Payload structure (without contents):" jq 'del(.changed[].content)' payload.json From 2ec0e46e7e3124c3caa1e5611f8c9e1b058c6d6f Mon Sep 17 00:00:00 2001 From: afterrburn Date: Wed, 18 Jun 2025 20:28:01 -0600 Subject: [PATCH 04/16] turn full sync back to workflow trigger --- .github/workflows/sync-docs-full.yml | 4 ++-- agent-docs/src/agents/doc-processing/docs-orchestrator.ts | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/sync-docs-full.yml b/.github/workflows/sync-docs-full.yml index 4e1c04ec..622679b4 100644 --- a/.github/workflows/sync-docs-full.yml +++ b/.github/workflows/sync-docs-full.yml @@ -1,7 +1,7 @@ name: Full Docs Sync to Vector Store on: - push: + workflow_dispatch: jobs: sync: @@ -70,4 +70,4 @@ jobs: curl https://agentuity.ai/webhook/f61d5ce9d6ed85695cc992c55ccdc2a6 \ -X POST \ -H "Content-Type: application/json" \ - -d @payload.json \ No newline at end of file + -d @payload.json \ No newline at end of file diff --git a/agent-docs/src/agents/doc-processing/docs-orchestrator.ts b/agent-docs/src/agents/doc-processing/docs-orchestrator.ts index 48a8d71b..f714a60b 100644 --- a/agent-docs/src/agents/doc-processing/docs-orchestrator.ts +++ b/agent-docs/src/agents/doc-processing/docs-orchestrator.ts @@ -76,7 +76,8 @@ export async function syncDocsFromPayload(ctx: AgentContext, payload: SyncPayloa ...chunk.metadata, path: logicalPath, }; - await ctx.vector.upsert(VECTOR_STORE_NAME, chunk); + const result = await ctx.vector.upsert(VECTOR_STORE_NAME, chunk); + ctx.logger.info('Upserted chunk: %o', result.length); } processed++; From 6d4b6859409d21aed33ec7478bf64056ec00f112 Mon Sep 17 00:00:00 2001 From: afterrburn Date: Thu, 19 Jun 2025 07:15:04 -0600 Subject: [PATCH 05/16] adding new agent --- agent-docs/agentuity.yaml | 3 +++ agent-docs/src/agents/doc-qa/index.ts | 9 +++++++++ 2 files changed, 12 insertions(+) create mode 100644 agent-docs/src/agents/doc-qa/index.ts diff --git a/agent-docs/agentuity.yaml b/agent-docs/agentuity.yaml index 6f46b42b..89d8f17b 100644 --- a/agent-docs/agentuity.yaml +++ b/agent-docs/agentuity.yaml @@ -75,3 +75,6 @@ agents: name: doc-processing # The description of the Agent which is editable description: An applicaiton that process documents + - id: agent_9ccc5545e93644bd9d7954e632a55a61 + name: doc-qa + description: Agent that can answer questions based on dev docs as the knowledge base diff --git a/agent-docs/src/agents/doc-qa/index.ts b/agent-docs/src/agents/doc-qa/index.ts new file mode 100644 index 00000000..511a9e46 --- /dev/null +++ b/agent-docs/src/agents/doc-qa/index.ts @@ -0,0 +1,9 @@ +import type { AgentContext, AgentRequest, AgentResponse } from '@agentuity/sdk'; + +export default async function Agent( + req: AgentRequest, + resp: AgentResponse, + ctx: AgentContext +) { + return resp.text('Hello from Agentuity!'); +} From afec57f3e7352675d4932682bd0df98f0afb64d2 Mon Sep 17 00:00:00 2001 From: afterrburn Date: Thu, 19 Jun 2025 07:19:36 -0600 Subject: [PATCH 06/16] add secret --- .github/workflows/sync-docs-full.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/sync-docs-full.yml b/.github/workflows/sync-docs-full.yml index 622679b4..f02c7a9a 100644 --- a/.github/workflows/sync-docs-full.yml +++ b/.github/workflows/sync-docs-full.yml @@ -60,6 +60,8 @@ jobs: jq 'del(.changed[].content)' payload.json - name: Send to Agentuity + env: + AGENTUITY_TOKEN: ${{ secrets.AGENTUITY_TOKEN }} run: | echo "About to sync these files:" jq -r '.changed[].path' payload.json @@ -69,5 +71,6 @@ jobs: # Uncomment to actually send curl https://agentuity.ai/webhook/f61d5ce9d6ed85695cc992c55ccdc2a6 \ -X POST \ + -H "Authorization: Bearer $AGENTUITY_TOKEN" \ -H "Content-Type: application/json" \ -d @payload.json \ No newline at end of file From d856eee59ae47754239688b33e3f4dfb8edbcc86 Mon Sep 17 00:00:00 2001 From: afterrburn Date: Thu, 19 Jun 2025 08:43:52 -0600 Subject: [PATCH 07/16] move the brittle bash scripting magic to modular bash script --- .github/workflows/sync-docs-full.yml | 68 +++----------------- .github/workflows/sync-docs.yml | 58 +++-------------- bin/build-payload.sh | 95 ++++++++++++++++++++++++++++ bin/collect-all-files.sh | 27 ++++++++ bin/collect-changed-files.sh | 55 ++++++++++++++++ bin/send-webhook.sh | 77 ++++++++++++++++++++++ bin/validate-files.sh | 50 +++++++++++++++ 7 files changed, 322 insertions(+), 108 deletions(-) create mode 100755 bin/build-payload.sh create mode 100755 bin/collect-all-files.sh create mode 100755 bin/collect-changed-files.sh create mode 100755 bin/send-webhook.sh create mode 100755 bin/validate-files.sh diff --git a/.github/workflows/sync-docs-full.yml b/.github/workflows/sync-docs-full.yml index f02c7a9a..6536b6ae 100644 --- a/.github/workflows/sync-docs-full.yml +++ b/.github/workflows/sync-docs-full.yml @@ -9,68 +9,18 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Get all MDX files and prepare payload - id: files + - name: Collect and validate files run: | - # First find all MDX files recursively - echo "Finding all MDX files..." - find content -type f -name "*.mdx" | sed 's|^content/||' > mdx_files.txt - echo "Found files:" - cat mdx_files.txt + ./bin/collect-all-files.sh | \ + ./bin/validate-files.sh > all-files.txt - # Create JSON array of changed files - echo "Processing files..." - rm -f changed_files.json - echo "[]" > changed_files.json - - while IFS= read -r path; do - [ -z "$path" ] && continue - if [ -f "content/$path" ]; then - echo "Processing: content/$path" >&2 - # Create individual file JSON and append to array - jq -n \ - --arg path "$path" \ - --arg content "$(base64 -w0 < "content/$path")" \ - '{path: $path, content: $content}' > temp_file.json - # Append to the array using jq - jq '. + [input]' changed_files.json temp_file.json > temp_changed.json - mv temp_changed.json changed_files.json - fi - done < mdx_files.txt - - # Create removed files array (same as changed for full sync) - cat mdx_files.txt | jq -R . | jq -s . > removed_files.json - - # Create final payload - jq -n \ - --slurpfile changed changed_files.json \ - --slurpfile removed removed_files.json \ - --arg repo "$GITHUB_REPOSITORY" \ - '{ - repo: $repo, - changed: ($changed | .[0] // []), - removed: ($removed | .[0] // []) - }' > payload.json - - # Clean up temp files - rm -f changed_files.json removed_files.json temp_file.json - - # Show debug info - echo "Payload structure (without contents):" - jq 'del(.changed[].content)' payload.json + echo "Files to sync:" + cat all-files.txt - - name: Send to Agentuity + - name: Build and send payload env: AGENTUITY_TOKEN: ${{ secrets.AGENTUITY_TOKEN }} run: | - echo "About to sync these files:" - jq -r '.changed[].path' payload.json - echo -e "\nWill first remove these paths:" - jq -r '.removed[]' payload.json - - # Uncomment to actually send - curl https://agentuity.ai/webhook/f61d5ce9d6ed85695cc992c55ccdc2a6 \ - -X POST \ - -H "Authorization: Bearer $AGENTUITY_TOKEN" \ - -H "Content-Type: application/json" \ - -d @payload.json \ No newline at end of file + cat all-files.txt | \ + ./bin/build-payload.sh "${{ github.repository }}" full | \ + ./bin/send-webhook.sh "https://agentuity.ai/webhook/f61d5ce9d6ed85695cc992c55ccdc2a6" "Bearer $AGENTUITY_TOKEN" \ No newline at end of file diff --git a/.github/workflows/sync-docs.yml b/.github/workflows/sync-docs.yml index 09a9491a..05236799 100644 --- a/.github/workflows/sync-docs.yml +++ b/.github/workflows/sync-docs.yml @@ -13,59 +13,19 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Get changed and removed files - id: files + - name: Collect and validate files run: | git fetch origin ${{ github.event.before }} + ./bin/collect-changed-files.sh "${{ github.event.before }}" "${{ github.sha }}" | \ + ./bin/validate-files.sh > changed-files.txt - # Get changed files (relative to content directory) - CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} -- 'content/**/*.mdx' | sed 's|^content/||') - REMOVED_FILES=$(git diff --name-only --diff-filter=D ${{ github.event.before }} ${{ github.sha }} -- 'content/**/*.mdx' | sed 's|^content/||') - - echo "Changed files: $CHANGED_FILES" - echo "Removed files: $REMOVED_FILES" - - # Build JSON payload with file contents - payload=$(jq -n \ - --arg commit "${{ github.sha }}" \ - --arg repo "${{ github.repository }}" \ - --argjson changed "$( - if [ -n "$CHANGED_FILES" ]; then - for f in $CHANGED_FILES; do - if [ -f "content/$f" ]; then - jq -n \ - --arg path "$f" \ - --arg content "$(base64 -w0 < "content/$f")" \ - '{path: $path, content: $content}' - fi - done | jq -s '.' - else - echo '[]' - fi - )" \ - --argjson removed "$( - if [ -n "$REMOVED_FILES" ]; then - printf '%s\n' $REMOVED_FILES | jq -R -s -c 'split("\n") | map(select(length > 0))' - else - echo '[]' - fi - )" \ - '{commit: $commit, repo: $repo, changed: $changed, removed: $removed}' - ) - - echo "payload<> $GITHUB_OUTPUT - echo "$payload" >> $GITHUB_OUTPUT - echo "EOF" >> $GITHUB_OUTPUT + echo "Files to sync:" + cat changed-files.txt - - name: Trigger Agentuity Sync Agent + - name: Build and send payload env: AGENTUITY_TOKEN: ${{ secrets.AGENTUITY_TOKEN }} run: | - echo "Sending payload to agent:" - echo '${{ steps.files.outputs.payload }}' | jq '.' - - curl https://agentuity.ai/webhook/f61d5ce9d6ed85695cc992c55ccdc2a6 \ - -X POST \ - -H "Authorization: Bearer $AGENTUITY_TOKEN" \ - -H "Content-Type: application/json" \ - -d '${{ steps.files.outputs.payload }}' \ No newline at end of file + cat changed-files.txt | \ + ./bin/build-payload.sh "${{ github.repository }}" incremental | \ + ./bin/send-webhook.sh "https://agentuity.ai/webhook/f61d5ce9d6ed85695cc992c55ccdc2a6" "Bearer $AGENTUITY_TOKEN" \ No newline at end of file diff --git a/bin/build-payload.sh b/bin/build-payload.sh new file mode 100755 index 00000000..23f8e021 --- /dev/null +++ b/bin/build-payload.sh @@ -0,0 +1,95 @@ +#!/bin/bash +set -euo pipefail + +# build-payload.sh [mode] +# Reads file paths from stdin, builds JSON payload +# mode: "incremental" (default) or "full" + +usage() { + echo "Usage: $0 [mode]" >&2 + echo "Example: $0 'owner/repo' incremental" >&2 + echo "Modes: incremental (default), full" >&2 + exit 1 +} + +if [ $# -lt 1 ]; then + usage +fi + +REPO_NAME="$1" +MODE="${2:-incremental}" + +echo "Building $MODE sync payload for $REPO_NAME" >&2 + +# Read all file paths into arrays +changed_files=() +removed_files=() + +while IFS= read -r file; do + if [ -z "$file" ]; then + continue + fi + + if [[ "$file" == REMOVED:* ]]; then + # Remove the REMOVED: prefix + removed_file="${file#REMOVED:}" + removed_files+=("$removed_file") + echo " removed: $removed_file" >&2 + else + changed_files+=("$file") + echo " changed: $file" >&2 + fi +done + +echo "Processing ${#changed_files[@]} changed files and ${#removed_files[@]} removed files" >&2 + +# For full mode, all files should be removed first +if [ "$MODE" = "full" ]; then + # Copy changed files to removed files for full refresh + removed_files=("${changed_files[@]}") + echo "Full mode: treating all files as removed for refresh" >&2 +fi + +# Start building JSON +echo "{" +echo " \"repo\": \"$REPO_NAME\"," + +# Build changed files array +echo " \"changed\": [" +first=true +for file in "${changed_files[@]}"; do + if [ -f "content/$file" ]; then + if [ "$first" = true ]; then + first=false + else + echo "," + fi + + # Read file content and base64 encode + content=$(base64 -w0 < "content/$file") + + echo -n " {" + echo -n "\"path\": \"$file\", " + echo -n "\"content\": \"$content\"" + echo -n "}" + fi +done +echo "" +echo " ]," + +# Build removed files array +echo " \"removed\": [" +first=true +for file in "${removed_files[@]}"; do + if [ "$first" = true ]; then + first=false + else + echo "," + fi + echo -n " \"$file\"" +done +echo "" +echo " ]" +echo "}" + +echo "Payload build complete" >&2 \ No newline at end of file diff --git a/bin/collect-all-files.sh b/bin/collect-all-files.sh new file mode 100755 index 00000000..33dcdb87 --- /dev/null +++ b/bin/collect-all-files.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -euo pipefail + +# collect-all-files.sh +# Outputs newline-separated list of all MDX files (relative to content/) + +echo "Collecting all MDX files for full sync" >&2 + +if [ ! -d "content" ]; then + echo "Error: content directory not found" >&2 + exit 1 +fi + +# Find all MDX files +find content -type f -name "*.mdx" | \ + sed 's|^content/||' | \ + sort | \ + while read -r file; do + if [ -n "$file" ] && [ -f "content/$file" ]; then + echo "$file" + echo " found: $file" >&2 + fi + done + +# Count and report +file_count=$(find content -type f -name "*.mdx" | wc -l) +echo "Total files found: $file_count" >&2 \ No newline at end of file diff --git a/bin/collect-changed-files.sh b/bin/collect-changed-files.sh new file mode 100755 index 00000000..73cd12f3 --- /dev/null +++ b/bin/collect-changed-files.sh @@ -0,0 +1,55 @@ +#!/bin/bash +set -euo pipefail + +# collect-changed-files.sh +# Outputs newline-separated list of changed MDX files (relative to content/) + +usage() { + echo "Usage: $0 " >&2 + echo "Example: $0 HEAD~1 HEAD" >&2 + exit 1 +} + +if [ $# -ne 2 ]; then + usage +fi + +BEFORE_COMMIT="$1" +AFTER_COMMIT="$2" + +# Validate commits exist +if ! git rev-parse --verify "$BEFORE_COMMIT" >/dev/null 2>&1; then + echo "Error: Invalid before commit: $BEFORE_COMMIT" >&2 + exit 1 +fi + +if ! git rev-parse --verify "$AFTER_COMMIT" >/dev/null 2>&1; then + echo "Error: Invalid after commit: $AFTER_COMMIT" >&2 + exit 1 +fi + +echo "Collecting changed files between $BEFORE_COMMIT and $AFTER_COMMIT" >&2 + +# Get changed files (excluding deleted) +echo "Changed files:" >&2 +git diff --name-only "$BEFORE_COMMIT" "$AFTER_COMMIT" -- 'content/**/*.mdx' | \ + grep '^content/' | \ + sed 's|^content/||' | \ + while read -r file; do + if [ -n "$file" ] && [ -f "content/$file" ]; then + echo "$file" + echo " + $file" >&2 + fi + done + +# Get removed files +echo "Removed files:" >&2 +git diff --name-only --diff-filter=D "$BEFORE_COMMIT" "$AFTER_COMMIT" -- 'content/**/*.mdx' | \ + grep '^content/' | \ + sed 's|^content/||' | \ + while read -r file; do + if [ -n "$file" ]; then + echo "REMOVED:$file" + echo " - $file" >&2 + fi + done \ No newline at end of file diff --git a/bin/send-webhook.sh b/bin/send-webhook.sh new file mode 100755 index 00000000..ceda5e9e --- /dev/null +++ b/bin/send-webhook.sh @@ -0,0 +1,77 @@ +#!/bin/bash +set -euo pipefail + +# send-webhook.sh [auth_token] +# Reads JSON payload from stdin, sends to webhook with retries + +usage() { + echo "Usage: $0 [auth_token]" >&2 + echo "Example: $0 'https://example.com/webhook' 'Bearer token123'" >&2 + exit 1 +} + +if [ $# -lt 1 ]; then + usage +fi + +WEBHOOK_URL="$1" +AUTH_TOKEN="${2:-}" +MAX_RETRIES=3 +RETRY_DELAY=2 + +echo "Sending webhook to $WEBHOOK_URL" >&2 + +# Read payload from stdin +payload=$(cat) + +if [ -z "$payload" ]; then + echo "Error: No payload received from stdin" >&2 + exit 1 +fi + +# Validate JSON +if ! echo "$payload" | jq . >/dev/null 2>&1; then + echo "Error: Invalid JSON payload" >&2 + exit 1 +fi + +echo "Payload size: $(echo "$payload" | wc -c) bytes" >&2 + +# Build curl command +curl_args=( + -X POST + -H "Content-Type: application/json" + -d "$payload" + --fail + --show-error + --silent +) + +# Add auth header if provided +if [ -n "$AUTH_TOKEN" ]; then + curl_args+=(-H "Authorization: $AUTH_TOKEN") +fi + +# Retry logic +for attempt in $(seq 1 $MAX_RETRIES); do + echo "Attempt $attempt/$MAX_RETRIES..." >&2 + + if response=$(curl "${curl_args[@]}" "$WEBHOOK_URL" 2>&1); then + echo "Success! Response:" >&2 + echo "$response" >&2 + echo "$response" + exit 0 + else + echo "Attempt $attempt failed: $response" >&2 + + if [ $attempt -lt $MAX_RETRIES ]; then + echo "Retrying in ${RETRY_DELAY}s..." >&2 + sleep $RETRY_DELAY + # Exponential backoff + RETRY_DELAY=$((RETRY_DELAY * 2)) + fi + fi +done + +echo "Error: All $MAX_RETRIES attempts failed" >&2 +exit 1 \ No newline at end of file diff --git a/bin/validate-files.sh b/bin/validate-files.sh new file mode 100755 index 00000000..e8998cf4 --- /dev/null +++ b/bin/validate-files.sh @@ -0,0 +1,50 @@ +#!/bin/bash +set -euo pipefail + +# validate-files.sh +# Reads file paths from stdin, validates they exist and are safe +# Outputs only valid file paths + +echo "Validating file paths" >&2 + +valid_count=0 +invalid_count=0 + +while IFS= read -r file; do + # Skip empty lines + if [ -z "$file" ]; then + continue + fi + + # Handle REMOVED: prefix + if [[ "$file" == REMOVED:* ]]; then + echo "$file" + ((valid_count++)) + continue + fi + + # Security check: prevent path traversal + if [[ "$file" == *".."* ]] || [[ "$file" == "/"* ]]; then + echo "Warning: Unsafe path detected, skipping: $file" >&2 + ((invalid_count++)) + continue + fi + + # Check if file exists + if [ -f "content/$file" ]; then + echo "$file" + echo " ✓ $file" >&2 + ((valid_count++)) + else + echo "Warning: File not found, skipping: $file" >&2 + ((invalid_count++)) + fi +done + +echo "Validation complete: $valid_count valid, $invalid_count invalid" >&2 + +# Exit with error if no valid files +if [ "$valid_count" -eq 0 ]; then + echo "Error: No valid files found" >&2 + exit 1 +fi \ No newline at end of file From bd0ec71ec2b262ce39224411aae5ecabbad24d71 Mon Sep 17 00:00:00 2001 From: afterrburn Date: Thu, 19 Jun 2025 08:44:16 -0600 Subject: [PATCH 08/16] test push --- .github/workflows/sync-docs-full.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sync-docs-full.yml b/.github/workflows/sync-docs-full.yml index 6536b6ae..6223416f 100644 --- a/.github/workflows/sync-docs-full.yml +++ b/.github/workflows/sync-docs-full.yml @@ -1,7 +1,7 @@ name: Full Docs Sync to Vector Store on: - workflow_dispatch: + push: jobs: sync: From 3cbb4c9b731332f8dca9e228f2b880297022353a Mon Sep 17 00:00:00 2001 From: afterrburn Date: Thu, 19 Jun 2025 09:03:27 -0600 Subject: [PATCH 09/16] fix broken pipe --- bin/validate-files.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bin/validate-files.sh b/bin/validate-files.sh index e8998cf4..6752d38d 100755 --- a/bin/validate-files.sh +++ b/bin/validate-files.sh @@ -10,7 +10,10 @@ echo "Validating file paths" >&2 valid_count=0 invalid_count=0 -while IFS= read -r file; do +# Read all input into an array first +mapfile -t files + +for file in "${files[@]}"; do # Skip empty lines if [ -z "$file" ]; then continue From ff284c1d6b15a248379c370d5837282afc1368b6 Mon Sep 17 00:00:00 2001 From: afterrburn Date: Thu, 19 Jun 2025 19:46:08 -0600 Subject: [PATCH 10/16] undo debug run --- .github/workflows/sync-docs-full.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sync-docs-full.yml b/.github/workflows/sync-docs-full.yml index 6223416f..6536b6ae 100644 --- a/.github/workflows/sync-docs-full.yml +++ b/.github/workflows/sync-docs-full.yml @@ -1,7 +1,7 @@ name: Full Docs Sync to Vector Store on: - push: + workflow_dispatch: jobs: sync: From 8928aa3df7bb731a70bfcafbd2c5cdc69a04803e Mon Sep 17 00:00:00 2001 From: afterrburn Date: Thu, 19 Jun 2025 19:50:38 -0600 Subject: [PATCH 11/16] apply coderabbit suggestions --- .github/workflows/sync-docs-full.yml | 2 ++ .github/workflows/sync-docs.yml | 4 ++++ bin/collect-all-files.sh | 1 + 3 files changed, 7 insertions(+) diff --git a/.github/workflows/sync-docs-full.yml b/.github/workflows/sync-docs-full.yml index 6536b6ae..59f719d8 100644 --- a/.github/workflows/sync-docs-full.yml +++ b/.github/workflows/sync-docs-full.yml @@ -11,6 +11,7 @@ jobs: - name: Collect and validate files run: | + set -euo pipefail ./bin/collect-all-files.sh | \ ./bin/validate-files.sh > all-files.txt @@ -21,6 +22,7 @@ jobs: env: AGENTUITY_TOKEN: ${{ secrets.AGENTUITY_TOKEN }} run: | + set -euo pipefail cat all-files.txt | \ ./bin/build-payload.sh "${{ github.repository }}" full | \ ./bin/send-webhook.sh "https://agentuity.ai/webhook/f61d5ce9d6ed85695cc992c55ccdc2a6" "Bearer $AGENTUITY_TOKEN" \ No newline at end of file diff --git a/.github/workflows/sync-docs.yml b/.github/workflows/sync-docs.yml index 05236799..9e28e839 100644 --- a/.github/workflows/sync-docs.yml +++ b/.github/workflows/sync-docs.yml @@ -12,9 +12,12 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Collect and validate files run: | + set -euo pipefail git fetch origin ${{ github.event.before }} ./bin/collect-changed-files.sh "${{ github.event.before }}" "${{ github.sha }}" | \ ./bin/validate-files.sh > changed-files.txt @@ -26,6 +29,7 @@ jobs: env: AGENTUITY_TOKEN: ${{ secrets.AGENTUITY_TOKEN }} run: | + set -euo pipefail cat changed-files.txt | \ ./bin/build-payload.sh "${{ github.repository }}" incremental | \ ./bin/send-webhook.sh "https://agentuity.ai/webhook/f61d5ce9d6ed85695cc992c55ccdc2a6" "Bearer $AGENTUITY_TOKEN" \ No newline at end of file diff --git a/bin/collect-all-files.sh b/bin/collect-all-files.sh index 33dcdb87..8353acdd 100755 --- a/bin/collect-all-files.sh +++ b/bin/collect-all-files.sh @@ -1,5 +1,6 @@ #!/bin/bash set -euo pipefail +trap "" PIPE # collect-all-files.sh # Outputs newline-separated list of all MDX files (relative to content/) From 4dccc270666f6a69ad680dfb53b31daa329d90f9 Mon Sep 17 00:00:00 2001 From: afterrburn Date: Thu, 19 Jun 2025 21:14:06 -0600 Subject: [PATCH 12/16] add a simple QA agent answering based on vector search result --- .../doc-processing/docs-orchestrator.ts | 2 +- .../agents/doc-processing/docs-processor.ts | 10 +- .../src/agents/doc-processing/embed-chunks.ts | 2 +- agent-docs/src/agents/doc-processing/types.ts | 13 ++- agent-docs/src/agents/doc-qa/config.ts | 2 + agent-docs/src/agents/doc-qa/index.ts | 109 +++++++++++++++++- agent-docs/src/agents/doc-qa/types.ts | 5 + 7 files changed, 130 insertions(+), 13 deletions(-) create mode 100644 agent-docs/src/agents/doc-qa/config.ts create mode 100644 agent-docs/src/agents/doc-qa/types.ts diff --git a/agent-docs/src/agents/doc-processing/docs-orchestrator.ts b/agent-docs/src/agents/doc-processing/docs-orchestrator.ts index f714a60b..fbdbd6dd 100644 --- a/agent-docs/src/agents/doc-processing/docs-orchestrator.ts +++ b/agent-docs/src/agents/doc-processing/docs-orchestrator.ts @@ -1,7 +1,7 @@ import type { AgentContext } from '@agentuity/sdk'; import { processDoc } from './docs-processor'; import { VECTOR_STORE_NAME } from './config'; -import type { FilePayload, SyncPayload, SyncStats } from './types'; +import type { SyncPayload, SyncStats } from './types'; /** * Helper to remove all vectors for a given logical path from the vector store. diff --git a/agent-docs/src/agents/doc-processing/docs-processor.ts b/agent-docs/src/agents/doc-processing/docs-processor.ts index d21b136f..c568d26f 100644 --- a/agent-docs/src/agents/doc-processing/docs-processor.ts +++ b/agent-docs/src/agents/doc-processing/docs-processor.ts @@ -3,16 +3,8 @@ import type { VectorUpsertParams } from '@agentuity/sdk'; import { chunkAndEnrichDoc } from './chunk-mdx'; import { embedChunks } from './embed-chunks'; import type { Chunk } from './chunk-mdx'; +import type { ChunkMetadata } from './types'; -export type ChunkMetadata = { - chunkIndex: number; - contentType: string; - heading: string; - title: string; - description: string; - text: string; - createdAt: string; -}; /** * Processes a single .mdx doc: loads, chunks, and enriches each chunk with metadata. diff --git a/agent-docs/src/agents/doc-processing/embed-chunks.ts b/agent-docs/src/agents/doc-processing/embed-chunks.ts index 6f2c97f5..0d508388 100644 --- a/agent-docs/src/agents/doc-processing/embed-chunks.ts +++ b/agent-docs/src/agents/doc-processing/embed-chunks.ts @@ -29,6 +29,6 @@ export async function embedChunks( if (!response.embeddings || response.embeddings.length !== texts.length) { throw new Error('Embedding API returned unexpected result.'); } - } + return response.embeddings; } \ No newline at end of file diff --git a/agent-docs/src/agents/doc-processing/types.ts b/agent-docs/src/agents/doc-processing/types.ts index 3a0815aa..bba55c9e 100644 --- a/agent-docs/src/agents/doc-processing/types.ts +++ b/agent-docs/src/agents/doc-processing/types.ts @@ -15,4 +15,15 @@ export interface SyncStats { deleted: number; errors: number; errorFiles: string[]; -} \ No newline at end of file +} + +export type ChunkMetadata = { + chunkIndex: number; + contentType: string; + heading: string; + title: string; + description: string; + text: string; + createdAt: string; + path?: string; +}; diff --git a/agent-docs/src/agents/doc-qa/config.ts b/agent-docs/src/agents/doc-qa/config.ts new file mode 100644 index 00000000..c72ec5b8 --- /dev/null +++ b/agent-docs/src/agents/doc-qa/config.ts @@ -0,0 +1,2 @@ +export const VECTOR_STORE_NAME = process.env.VECTOR_STORE_NAME || 'docs'; +export const vectorSearchNumber = 20; \ No newline at end of file diff --git a/agent-docs/src/agents/doc-qa/index.ts b/agent-docs/src/agents/doc-qa/index.ts index 511a9e46..fded198b 100644 --- a/agent-docs/src/agents/doc-qa/index.ts +++ b/agent-docs/src/agents/doc-qa/index.ts @@ -1,9 +1,116 @@ import type { AgentContext, AgentRequest, AgentResponse } from '@agentuity/sdk'; +import { streamText } from 'ai'; +import { openai } from '@ai-sdk/openai'; + +import type { ChunkMetadata } from '../doc-processing/types'; +import { VECTOR_STORE_NAME, vectorSearchNumber } from './config'; +import type { RelevantDoc } from './types'; export default async function Agent( req: AgentRequest, resp: AgentResponse, ctx: AgentContext ) { - return resp.text('Hello from Agentuity!'); + const prompt = await req.data.text(); + const relevantDocs = await retrieveRelevantDocs(ctx, prompt); + + const systemPrompt = ` +You are a developer documentation assistant. Your job is to answer user questions about the Agentuity platform as effectively and concisely as possible, adapting your style to the user's request. If the user asks for a direct answer, provide it without extra explanation. If they want an explanation, provide a clear and concise one. Use only the provided relevant documents to answer. + +For every answer, return a valid JSON object with: + 1. "answer": your answer to the user's question. + 2. "documents": an array of strings, representing the path of the documents you used to answer. + +If you use information from a document, include it in the "documents" array. If you do not use any documents, return an empty array for "documents". + +User question: +\`\`\` +${prompt} +\`\`\` + +Relevant documents: +${JSON.stringify(relevantDocs, null, 2)} + +Respond ONLY with a valid JSON object as described above. In your answer, you should format code blocks properly in Markdown style if the user needs answer in code block. +`.trim(); + + const llmResponse = await streamText({ + model: openai('gpt-4o'), + system: systemPrompt, + prompt: prompt, + maxTokens: 2048, + }); + + let answer = ''; + for await (const delta of llmResponse.textStream) { + answer += delta; + } + + return resp.text(answer); } + +async function retrieveRelevantDocs(ctx: AgentContext, prompt: string): Promise { + const dbQuery = { + query: prompt, + limit: vectorSearchNumber + } + var vectors = await ctx.vector.search(VECTOR_STORE_NAME, dbQuery); + + const uniquePaths = new Set(); + + vectors.forEach(vec => { + if (!vec.metadata) { + ctx.logger.warn('Vector missing metadata'); + return; + } + const path = typeof vec.metadata.path === 'string' ? vec.metadata.path : undefined; + if (!path) { + ctx.logger.warn('Vector metadata path is not a string'); + return; + } + if (!path) { + ctx.logger.warn('Vector metadata missing path'); + return; + } + uniquePaths.add(path); + }); + + const docs = await Promise.all( + Array.from(uniquePaths).map(async path => ({ + path, + content: await retrieveDocumentBasedOnPath(ctx, path) + })) + ); + + return docs; +} + +async function retrieveDocumentBasedOnPath(ctx: AgentContext, path: string): Promise { + const dbQuery = { + query: ' ', + limit: 10000, + metadata: { + path: path + } + } + try { + const vectors = await ctx.vector.search(VECTOR_STORE_NAME, dbQuery); + + // Sort vectors by chunk index and concatenate text + const sortedVectors = vectors + .map(vec => ({ + metadata: vec.metadata as ChunkMetadata, + index: (vec.metadata as ChunkMetadata).chunkIndex + })) + .sort((a, b) => a.index - b.index); + + const fullText = sortedVectors + .map(vec => vec.metadata.text) + .join('\n\n'); + + return fullText; + } catch (err) { + ctx.logger.error('Error retrieving document by path %s: %o', path, err); + return ''; + } +} \ No newline at end of file diff --git a/agent-docs/src/agents/doc-qa/types.ts b/agent-docs/src/agents/doc-qa/types.ts new file mode 100644 index 00000000..9fa227ff --- /dev/null +++ b/agent-docs/src/agents/doc-qa/types.ts @@ -0,0 +1,5 @@ +export interface RelevantDoc { + path: string; + content: string; + } + \ No newline at end of file From fbbbd646600d746320f62462325e44fb6974c144 Mon Sep 17 00:00:00 2001 From: afterrburn Date: Fri, 20 Jun 2025 07:48:36 -0600 Subject: [PATCH 13/16] return streaming result --- agent-docs/src/agents/doc-qa/index.ts | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/agent-docs/src/agents/doc-qa/index.ts b/agent-docs/src/agents/doc-qa/index.ts index fded198b..4b86609c 100644 --- a/agent-docs/src/agents/doc-qa/index.ts +++ b/agent-docs/src/agents/doc-qa/index.ts @@ -41,12 +41,7 @@ Respond ONLY with a valid JSON object as described above. In your answer, you sh maxTokens: 2048, }); - let answer = ''; - for await (const delta of llmResponse.textStream) { - answer += delta; - } - - return resp.text(answer); + return resp.stream(llmResponse.textStream); } async function retrieveRelevantDocs(ctx: AgentContext, prompt: string): Promise { From 5efc8540cd36e0470c40e4180a8c48f65beeec42 Mon Sep 17 00:00:00 2001 From: afterrburn Date: Fri, 20 Jun 2025 07:51:02 -0600 Subject: [PATCH 14/16] move config to unified root file --- agent-docs/src/agents/doc-processing/config.ts | 1 - agent-docs/src/agents/doc-processing/docs-orchestrator.ts | 2 +- agent-docs/src/agents/doc-processing/index.ts | 2 +- agent-docs/src/agents/doc-qa/index.ts | 2 +- agent-docs/src/agents/doc-qa/config.ts => config.ts | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) delete mode 100644 agent-docs/src/agents/doc-processing/config.ts rename agent-docs/src/agents/doc-qa/config.ts => config.ts (90%) diff --git a/agent-docs/src/agents/doc-processing/config.ts b/agent-docs/src/agents/doc-processing/config.ts deleted file mode 100644 index 214f156c..00000000 --- a/agent-docs/src/agents/doc-processing/config.ts +++ /dev/null @@ -1 +0,0 @@ -export const VECTOR_STORE_NAME = process.env.VECTOR_STORE_NAME || 'docs'; \ No newline at end of file diff --git a/agent-docs/src/agents/doc-processing/docs-orchestrator.ts b/agent-docs/src/agents/doc-processing/docs-orchestrator.ts index fbdbd6dd..3cdefbb7 100644 --- a/agent-docs/src/agents/doc-processing/docs-orchestrator.ts +++ b/agent-docs/src/agents/doc-processing/docs-orchestrator.ts @@ -1,6 +1,6 @@ import type { AgentContext } from '@agentuity/sdk'; import { processDoc } from './docs-processor'; -import { VECTOR_STORE_NAME } from './config'; +import { VECTOR_STORE_NAME } from '../../../../config'; import type { SyncPayload, SyncStats } from './types'; /** diff --git a/agent-docs/src/agents/doc-processing/index.ts b/agent-docs/src/agents/doc-processing/index.ts index 6bb74899..629e0773 100644 --- a/agent-docs/src/agents/doc-processing/index.ts +++ b/agent-docs/src/agents/doc-processing/index.ts @@ -1,6 +1,6 @@ import type { AgentContext, AgentRequest, AgentResponse } from '@agentuity/sdk'; import { syncDocsFromPayload } from './docs-orchestrator'; -import type { FilePayload, SyncPayload } from './types'; +import type { SyncPayload } from './types'; export const welcome = () => { return { diff --git a/agent-docs/src/agents/doc-qa/index.ts b/agent-docs/src/agents/doc-qa/index.ts index 4b86609c..acb29910 100644 --- a/agent-docs/src/agents/doc-qa/index.ts +++ b/agent-docs/src/agents/doc-qa/index.ts @@ -3,7 +3,7 @@ import { streamText } from 'ai'; import { openai } from '@ai-sdk/openai'; import type { ChunkMetadata } from '../doc-processing/types'; -import { VECTOR_STORE_NAME, vectorSearchNumber } from './config'; +import { VECTOR_STORE_NAME, vectorSearchNumber } from '../../../../config'; import type { RelevantDoc } from './types'; export default async function Agent( diff --git a/agent-docs/src/agents/doc-qa/config.ts b/config.ts similarity index 90% rename from agent-docs/src/agents/doc-qa/config.ts rename to config.ts index c72ec5b8..3088c1f8 100644 --- a/agent-docs/src/agents/doc-qa/config.ts +++ b/config.ts @@ -1,2 +1,2 @@ -export const VECTOR_STORE_NAME = process.env.VECTOR_STORE_NAME || 'docs'; +export const VECTOR_STORE_NAME = process.env.VECTOR_STORE_NAME || 'docs'; export const vectorSearchNumber = 20; \ No newline at end of file From 7d07da8b29f0c27fc4a439e365798356be900c72 Mon Sep 17 00:00:00 2001 From: afterrburn Date: Fri, 20 Jun 2025 08:00:02 -0600 Subject: [PATCH 15/16] enhance prompt --- agent-docs/src/agents/doc-qa/index.ts | 60 +++++++++++++++------------ 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/agent-docs/src/agents/doc-qa/index.ts b/agent-docs/src/agents/doc-qa/index.ts index acb29910..11519270 100644 --- a/agent-docs/src/agents/doc-qa/index.ts +++ b/agent-docs/src/agents/doc-qa/index.ts @@ -17,6 +17,11 @@ export default async function Agent( const systemPrompt = ` You are a developer documentation assistant. Your job is to answer user questions about the Agentuity platform as effectively and concisely as possible, adapting your style to the user's request. If the user asks for a direct answer, provide it without extra explanation. If they want an explanation, provide a clear and concise one. Use only the provided relevant documents to answer. +You must not make up answers if the provided documents don't exist. You can be direct to the user that the documentations +don't seem to include what they are looking for. Lying to the user is prohibited as it only slows them down. Feel free to +suggest follow up questions if what they're asking for don't seem to have an answer in the document. You can provide them +a few related things that the documents contain that may interest them. + For every answer, return a valid JSON object with: 1. "answer": your answer to the user's question. 2. "documents": an array of strings, representing the path of the documents you used to answer. @@ -33,7 +38,7 @@ ${JSON.stringify(relevantDocs, null, 2)} Respond ONLY with a valid JSON object as described above. In your answer, you should format code blocks properly in Markdown style if the user needs answer in code block. `.trim(); - + const llmResponse = await streamText({ model: openai('gpt-4o'), system: systemPrompt, @@ -49,35 +54,38 @@ async function retrieveRelevantDocs(ctx: AgentContext, prompt: string): Promise< query: prompt, limit: vectorSearchNumber } - var vectors = await ctx.vector.search(VECTOR_STORE_NAME, dbQuery); + try { - const uniquePaths = new Set(); - vectors.forEach(vec => { - if (!vec.metadata) { - ctx.logger.warn('Vector missing metadata'); - return; - } - const path = typeof vec.metadata.path === 'string' ? vec.metadata.path : undefined; - if (!path) { - ctx.logger.warn('Vector metadata path is not a string'); - return; - } - if (!path) { - ctx.logger.warn('Vector metadata missing path'); - return; - } - uniquePaths.add(path); - }); + var vectors = await ctx.vector.search(VECTOR_STORE_NAME, dbQuery); + + const uniquePaths = new Set(); + + vectors.forEach(vec => { + if (!vec.metadata) { + ctx.logger.warn('Vector missing metadata'); + return; + } + const path = typeof vec.metadata.path === 'string' ? vec.metadata.path : undefined; + if (!path) { + ctx.logger.warn('Vector metadata path is not a string'); + return; + } + uniquePaths.add(path); + }); - const docs = await Promise.all( - Array.from(uniquePaths).map(async path => ({ - path, - content: await retrieveDocumentBasedOnPath(ctx, path) - })) - ); + const docs = await Promise.all( + Array.from(uniquePaths).map(async path => ({ + path, + content: await retrieveDocumentBasedOnPath(ctx, path) + })) + ); - return docs; + return docs; + } catch (err) { + ctx.logger.error('Error retrieving relevant docs: %o', err); + return []; + } } async function retrieveDocumentBasedOnPath(ctx: AgentContext, path: string): Promise { From f1d2d2851d11fe642b4f309507d85dd7be8271f1 Mon Sep 17 00:00:00 2001 From: afterrburn Date: Fri, 20 Jun 2025 08:09:29 -0600 Subject: [PATCH 16/16] clean up redundancy --- agent-docs/src/agents/doc-qa/index.ts | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/agent-docs/src/agents/doc-qa/index.ts b/agent-docs/src/agents/doc-qa/index.ts index 11519270..54351f89 100644 --- a/agent-docs/src/agents/doc-qa/index.ts +++ b/agent-docs/src/agents/doc-qa/index.ts @@ -57,7 +57,7 @@ async function retrieveRelevantDocs(ctx: AgentContext, prompt: string): Promise< try { - var vectors = await ctx.vector.search(VECTOR_STORE_NAME, dbQuery); + const vectors = await ctx.vector.search(VECTOR_STORE_NAME, dbQuery); const uniquePaths = new Set(); @@ -101,10 +101,13 @@ async function retrieveDocumentBasedOnPath(ctx: AgentContext, path: string): Pro // Sort vectors by chunk index and concatenate text const sortedVectors = vectors - .map(vec => ({ - metadata: vec.metadata as ChunkMetadata, - index: (vec.metadata as ChunkMetadata).chunkIndex - })) + .map(vec => { + const metadata = vec.metadata as ChunkMetadata; + return { + metadata, + index: metadata.chunkIndex + }; + }) .sort((a, b) => a.index - b.index); const fullText = sortedVectors