agentuity · afterrburn · Jun 22, 2025 · Jun 19, 2025 · Jun 19, 2025 · Jun 19, 2025
diff --git a/.github/workflows/sync-docs-full.yml b/.github/workflows/sync-docs-full.yml
@@ -9,50 +9,20 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - name: Get all MDX files and prepare payload
-        id: files
+      - name: Collect and validate files
         run: |
-          # First find all MDX files recursively
-          echo "Finding all MDX files..."
-          find content -type f -name "*.mdx" | sed 's|^content/||' > mdx_files.txt
-          echo "Found files:"
-          cat mdx_files.txt
+          set -euo pipefail
+          ./bin/collect-all-files.sh | \
+            ./bin/validate-files.sh > all-files.txt
 
-          # Create the changed array by processing each file through jq
-          echo "Processing files..."
-          jq -n --slurpfile paths <(
-            while IFS= read -r path; do
-              [ -z "$path" ] && continue
-              if [ -f "content/$path" ]; then
-                echo "Processing: content/$path"
-                jq -n \
-                  --arg path "$path" \
-                  --arg content "$(base64 -w0 < "content/$path")" \
-                  '{path: $path, content: $content}'
-              fi
-            done < mdx_files.txt | jq -s '.'
-          ) \
-          --slurpfile removed <(cat mdx_files.txt | jq -R . | jq -s .) \
-          --arg repo "$GITHUB_REPOSITORY" \
-          '{
-            repo: $repo,
-            changed: ($paths | .[0] // []),
-            removed: ($removed | .[0] // [])
-          }' > payload.json
-
-          # Show debug info
-          echo "Payload structure (without contents):"
-          jq 'del(.changed[].content)' payload.json
+          echo "Files to sync:"
+          cat all-files.txt
 
-      - name: Send to Agentuity
+      - name: Build and send payload
+        env:
+          AGENTUITY_TOKEN: ${{ secrets.AGENTUITY_TOKEN }}
         run: |
-          echo "About to sync these files:"
-          jq -r '.changed[].path' payload.json
-          echo -e "\nWill first remove these paths:"
-          jq -r '.removed[]' payload.json
-
-          # Uncomment to actually send
-          curl https://agentuity.ai/webhook/f61d5ce9d6ed85695cc992c55ccdc2a6 \
-            -X POST \
-            -H "Content-Type: application/json" \
-            -d @payload.json 
+          set -euo pipefail
+          cat all-files.txt | \
+            ./bin/build-payload.sh "${{ github.repository }}" full | \
+            ./bin/send-webhook.sh "https://agentuity.ai/webhook/f61d5ce9d6ed85695cc992c55ccdc2a6" "Bearer $AGENTUITY_TOKEN"
diff --git a/.github/workflows/sync-docs.yml b/.github/workflows/sync-docs.yml
@@ -12,60 +12,24 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
 
-      - name: Get changed and removed files
-        id: files
+      - name: Collect and validate files
         run: |
+          set -euo pipefail
           git fetch origin ${{ github.event.before }}
+          ./bin/collect-changed-files.sh "${{ github.event.before }}" "${{ github.sha }}" | \
+            ./bin/validate-files.sh > changed-files.txt
 
-          # Get changed files (relative to content directory)
-          CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} -- 'content/**/*.mdx' | sed 's|^content/||')
-          REMOVED_FILES=$(git diff --name-only --diff-filter=D ${{ github.event.before }} ${{ github.sha }} -- 'content/**/*.mdx' | sed 's|^content/||')
-
-          echo "Changed files: $CHANGED_FILES"
-          echo "Removed files: $REMOVED_FILES"
-
-          # Build JSON payload with file contents
-          payload=$(jq -n \
-            --arg commit "${{ github.sha }}" \
-            --arg repo "${{ github.repository }}" \
-            --argjson changed "$(
-              if [ -n "$CHANGED_FILES" ]; then
-                for f in $CHANGED_FILES; do
-                  if [ -f "content/$f" ]; then
-                    jq -n \
-                      --arg path "$f" \
-                      --arg content "$(base64 -w0 < "content/$f")" \
-                      '{path: $path, content: $content}'
-                  fi
-                done | jq -s '.'
-              else
-                echo '[]'
-              fi
-            )" \
-            --argjson removed "$(
-              if [ -n "$REMOVED_FILES" ]; then
-                printf '%s\n' $REMOVED_FILES | jq -R -s -c 'split("\n") | map(select(length > 0))'
-              else
-                echo '[]'
-              fi
-            )" \
-            '{commit: $commit, repo: $repo, changed: $changed, removed: $removed}'
-          )
-
-          echo "payload<<EOF" >> $GITHUB_OUTPUT
-          echo "$payload" >> $GITHUB_OUTPUT
-          echo "EOF" >> $GITHUB_OUTPUT
+          echo "Files to sync:"
+          cat changed-files.txt
 
-      - name: Trigger Agentuity Sync Agent
+      - name: Build and send payload
         env:
           AGENTUITY_TOKEN: ${{ secrets.AGENTUITY_TOKEN }}
         run: |
-          echo "Sending payload to agent:"
-          echo '${{ steps.files.outputs.payload }}' | jq '.'
-
-          curl https://agentuity.ai/webhook/f61d5ce9d6ed85695cc992c55ccdc2a6 \
-            -X POST \
-            -H "Authorization: Bearer $AGENTUITY_TOKEN" \
-            -H "Content-Type: application/json" \
-            -d '${{ steps.files.outputs.payload }}'
+          set -euo pipefail
+          cat changed-files.txt | \
+            ./bin/build-payload.sh "${{ github.repository }}" incremental | \
+            ./bin/send-webhook.sh "https://agentuity.ai/webhook/f61d5ce9d6ed85695cc992c55ccdc2a6" "Bearer $AGENTUITY_TOKEN"
diff --git a/agent-docs/agentuity.yaml b/agent-docs/agentuity.yaml
@@ -75,3 +75,6 @@ agents:
     name: doc-processing
     # The description of the Agent which is editable
     description: An applicaiton that process documents
+  - id: agent_9ccc5545e93644bd9d7954e632a55a61
+    name: doc-qa
+    description: Agent that can answer questions based on dev docs as the knowledge base
diff --git a/agent-docs/src/agents/doc-processing/docs-orchestrator.ts b/agent-docs/src/agents/doc-processing/docs-orchestrator.ts
@@ -1,7 +1,7 @@
 import type { AgentContext } from '@agentuity/sdk';
 import { processDoc } from './docs-processor';
-import { VECTOR_STORE_NAME } from './config';
-import type { FilePayload, SyncPayload, SyncStats } from './types';
+import { VECTOR_STORE_NAME } from '../../../../config';
+import type { SyncPayload, SyncStats } from './types';
 
 /**
  * Helper to remove all vectors for a given logical path from the vector store.
@@ -76,7 +76,8 @@ export async function syncDocsFromPayload(ctx: AgentContext, payload: SyncPayloa
           ...chunk.metadata,
           path: logicalPath,
         };
-        await ctx.vector.upsert(VECTOR_STORE_NAME, chunk);
+        const result = await ctx.vector.upsert(VECTOR_STORE_NAME, chunk);
+        ctx.logger.info('Upserted chunk: %o', result.length);
       }
 
       processed++;

diff --git a/agent-docs/src/agents/doc-processing/docs-processor.ts b/agent-docs/src/agents/doc-processing/docs-processor.ts
@@ -3,16 +3,8 @@ import type { VectorUpsertParams } from '@agentuity/sdk';
 import { chunkAndEnrichDoc } from './chunk-mdx';
 import { embedChunks } from './embed-chunks';
 import type { Chunk } from './chunk-mdx';
+import type { ChunkMetadata } from './types';
 
-export type ChunkMetadata = {
-  chunkIndex: number;
-  contentType: string;
-  heading: string;
-  title: string;
-  description: string;
-  text: string;
-  createdAt: string;
-};
 
 /**
  * Processes a single .mdx doc: loads, chunks, and enriches each chunk with metadata.

diff --git a/agent-docs/src/agents/doc-processing/embed-chunks.ts b/agent-docs/src/agents/doc-processing/embed-chunks.ts
@@ -29,6 +29,6 @@ export async function embedChunks(
   if (!response.embeddings || response.embeddings.length !== texts.length) {
     throw new Error('Embedding API returned unexpected result.');
   }
-  }
+
   return response.embeddings;
 } 
diff --git a/agent-docs/src/agents/doc-processing/index.ts b/agent-docs/src/agents/doc-processing/index.ts
@@ -1,6 +1,6 @@
 import type { AgentContext, AgentRequest, AgentResponse } from '@agentuity/sdk';
 import { syncDocsFromPayload } from './docs-orchestrator';
-import type { FilePayload, SyncPayload } from './types';
+import type { SyncPayload } from './types';
 
 export const welcome = () => {
   return {

diff --git a/agent-docs/src/agents/doc-processing/types.ts b/agent-docs/src/agents/doc-processing/types.ts
@@ -15,4 +15,15 @@ export interface SyncStats {
   deleted: number;
   errors: number;
   errorFiles: string[];
-} 
+} 
+
+export type ChunkMetadata = {
+  chunkIndex: number;
+  contentType: string;
+  heading: string;
+  title: string;
+  description: string;
+  text: string;
+  createdAt: string;
+  path?: string;
+};
diff --git a/agent-docs/src/agents/doc-qa/index.ts b/agent-docs/src/agents/doc-qa/index.ts
@@ -0,0 +1,122 @@
+import type { AgentContext, AgentRequest, AgentResponse } from '@agentuity/sdk';
+import { streamText } from 'ai';
+import { openai } from '@ai-sdk/openai';
+
+import type { ChunkMetadata } from '../doc-processing/types';
+import { VECTOR_STORE_NAME, vectorSearchNumber } from '../../../../config';
+import type { RelevantDoc } from './types';
+
+export default async function Agent(
+  req: AgentRequest,
+  resp: AgentResponse,
+  ctx: AgentContext
+) {
+  const prompt = await req.data.text();
+  const relevantDocs = await retrieveRelevantDocs(ctx, prompt);
+
+  const systemPrompt = `
+You are a developer documentation assistant. Your job is to answer user questions about the Agentuity platform as effectively and concisely as possible, adapting your style to the user's request. If the user asks for a direct answer, provide it without extra explanation. If they want an explanation, provide a clear and concise one. Use only the provided relevant documents to answer.
+
+You must not make up answers if the provided documents don't exist. You can be direct to the user that the documentations
+don't seem to include what they are looking for. Lying to the user is prohibited as it only slows them down. Feel free to
+suggest follow up questions if what they're asking for don't seem to have an answer in the document. You can provide them
+a few related things that the documents contain that may interest them.
+
+For every answer, return a valid JSON object with:
+  1. "answer": your answer to the user's question.
+  2. "documents": an array of strings, representing the path of the documents you used to answer.
+
+If you use information from a document, include it in the "documents" array. If you do not use any documents, return an empty array for "documents".
+
+User question:
+\`\`\`
+${prompt}
+\`\`\`
+
+Relevant documents:
+${JSON.stringify(relevantDocs, null, 2)}
+
+Respond ONLY with a valid JSON object as described above. In your answer, you should format code blocks properly in Markdown style if the user needs answer in code block.
+`.trim();
+
+  const llmResponse = await streamText({
+    model: openai('gpt-4o'),
+    system: systemPrompt,
+    prompt: prompt,
+    maxTokens: 2048,
+  });
+
+  return resp.stream(llmResponse.textStream);
+}
+
+async function retrieveRelevantDocs(ctx: AgentContext, prompt: string): Promise<RelevantDoc[]> {
+  const dbQuery = {
+    query: prompt,
+    limit: vectorSearchNumber
+  }
+  try {
+
+
+    const vectors = await ctx.vector.search(VECTOR_STORE_NAME, dbQuery);
+
+    const uniquePaths = new Set<string>();
+
+    vectors.forEach(vec => {
+      if (!vec.metadata) {
+        ctx.logger.warn('Vector missing metadata');
+        return;
+      }
+      const path = typeof vec.metadata.path === 'string' ? vec.metadata.path : undefined;
+      if (!path) {
+        ctx.logger.warn('Vector metadata path is not a string');
+        return;
+      }
+      uniquePaths.add(path);
+    });
+
+    const docs = await Promise.all(
+      Array.from(uniquePaths).map(async path => ({
+        path,
+        content: await retrieveDocumentBasedOnPath(ctx, path)
+      }))
+    );
+
+    return docs;
+  } catch (err) {
+    ctx.logger.error('Error retrieving relevant docs: %o', err);
+    return [];
+  }
+}
+
+async function retrieveDocumentBasedOnPath(ctx: AgentContext, path: string): Promise<string> {
+  const dbQuery = {
+    query: ' ',
+    limit: 10000,
+    metadata: {
+      path: path
+    }
+  }
+  try {
+    const vectors = await ctx.vector.search(VECTOR_STORE_NAME, dbQuery);
+
+    // Sort vectors by chunk index and concatenate text
+    const sortedVectors = vectors
+      .map(vec => {
+        const metadata = vec.metadata as ChunkMetadata;
+        return {
+          metadata,
+          index: metadata.chunkIndex
+        };
+      })
+      .sort((a, b) => a.index - b.index);
+
+    const fullText = sortedVectors
+      .map(vec => vec.metadata.text)
+      .join('\n\n');
+
+    return fullText;
+  } catch (err) {
+    ctx.logger.error('Error retrieving document by path %s: %o', path, err);
+    return '';
+  }
+}
diff --git a/agent-docs/src/agents/doc-qa/types.ts b/agent-docs/src/agents/doc-qa/types.ts
@@ -0,0 +1,5 @@
+export interface RelevantDoc {
+    path: string;
+    content: string;
+  }
+