Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 13 additions & 43 deletions .github/workflows/sync-docs-full.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,50 +9,20 @@ jobs:
steps:
- uses: actions/checkout@v4

- name: Get all MDX files and prepare payload
id: files
- name: Collect and validate files
run: |
# First find all MDX files recursively
echo "Finding all MDX files..."
find content -type f -name "*.mdx" | sed 's|^content/||' > mdx_files.txt
echo "Found files:"
cat mdx_files.txt
set -euo pipefail
./bin/collect-all-files.sh | \
./bin/validate-files.sh > all-files.txt

# Create the changed array by processing each file through jq
echo "Processing files..."
jq -n --slurpfile paths <(
while IFS= read -r path; do
[ -z "$path" ] && continue
if [ -f "content/$path" ]; then
echo "Processing: content/$path"
jq -n \
--arg path "$path" \
--arg content "$(base64 -w0 < "content/$path")" \
'{path: $path, content: $content}'
fi
done < mdx_files.txt | jq -s '.'
) \
--slurpfile removed <(cat mdx_files.txt | jq -R . | jq -s .) \
--arg repo "$GITHUB_REPOSITORY" \
'{
repo: $repo,
changed: ($paths | .[0] // []),
removed: ($removed | .[0] // [])
}' > payload.json

# Show debug info
echo "Payload structure (without contents):"
jq 'del(.changed[].content)' payload.json
echo "Files to sync:"
cat all-files.txt

- name: Send to Agentuity
- name: Build and send payload
env:
AGENTUITY_TOKEN: ${{ secrets.AGENTUITY_TOKEN }}
run: |
echo "About to sync these files:"
jq -r '.changed[].path' payload.json
echo -e "\nWill first remove these paths:"
jq -r '.removed[]' payload.json

# Uncomment to actually send
curl https://agentuity.ai/webhook/f61d5ce9d6ed85695cc992c55ccdc2a6 \
-X POST \
-H "Content-Type: application/json" \
-d @payload.json
set -euo pipefail
cat all-files.txt | \
./bin/build-payload.sh "${{ github.repository }}" full | \
./bin/send-webhook.sh "https://agentuity.ai/webhook/f61d5ce9d6ed85695cc992c55ccdc2a6" "Bearer $AGENTUITY_TOKEN"
62 changes: 13 additions & 49 deletions .github/workflows/sync-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,60 +12,24 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Get changed and removed files
id: files
- name: Collect and validate files
run: |
set -euo pipefail
git fetch origin ${{ github.event.before }}
./bin/collect-changed-files.sh "${{ github.event.before }}" "${{ github.sha }}" | \
./bin/validate-files.sh > changed-files.txt

# Get changed files (relative to content directory)
CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} -- 'content/**/*.mdx' | sed 's|^content/||')
REMOVED_FILES=$(git diff --name-only --diff-filter=D ${{ github.event.before }} ${{ github.sha }} -- 'content/**/*.mdx' | sed 's|^content/||')

echo "Changed files: $CHANGED_FILES"
echo "Removed files: $REMOVED_FILES"

# Build JSON payload with file contents
payload=$(jq -n \
--arg commit "${{ github.sha }}" \
--arg repo "${{ github.repository }}" \
--argjson changed "$(
if [ -n "$CHANGED_FILES" ]; then
for f in $CHANGED_FILES; do
if [ -f "content/$f" ]; then
jq -n \
--arg path "$f" \
--arg content "$(base64 -w0 < "content/$f")" \
'{path: $path, content: $content}'
fi
done | jq -s '.'
else
echo '[]'
fi
)" \
--argjson removed "$(
if [ -n "$REMOVED_FILES" ]; then
printf '%s\n' $REMOVED_FILES | jq -R -s -c 'split("\n") | map(select(length > 0))'
else
echo '[]'
fi
)" \
'{commit: $commit, repo: $repo, changed: $changed, removed: $removed}'
)

echo "payload<<EOF" >> $GITHUB_OUTPUT
echo "$payload" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
echo "Files to sync:"
cat changed-files.txt

- name: Trigger Agentuity Sync Agent
- name: Build and send payload
env:
AGENTUITY_TOKEN: ${{ secrets.AGENTUITY_TOKEN }}
run: |
echo "Sending payload to agent:"
echo '${{ steps.files.outputs.payload }}' | jq '.'

curl https://agentuity.ai/webhook/f61d5ce9d6ed85695cc992c55ccdc2a6 \
-X POST \
-H "Authorization: Bearer $AGENTUITY_TOKEN" \
-H "Content-Type: application/json" \
-d '${{ steps.files.outputs.payload }}'
set -euo pipefail
cat changed-files.txt | \
./bin/build-payload.sh "${{ github.repository }}" incremental | \
./bin/send-webhook.sh "https://agentuity.ai/webhook/f61d5ce9d6ed85695cc992c55ccdc2a6" "Bearer $AGENTUITY_TOKEN"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Secure webhook URL and add newline at EOF.
Replace the hard-coded endpoint with a GitHub secret and append a newline:

-            ./bin/send-webhook.sh "https://agentuity.ai/webhook/…"
+            ./bin/send-webhook.sh "${{ secrets.SYNC_DOCS_WEBHOOK_URL }}"

+# (ensure newline at end of file)

Committable suggestion skipped: line range outside the PR's diff.

🧰 Tools
🪛 YAMLlint (1.37.1)

[error] 35-35: no new line character at the end of file

(new-line-at-end-of-file)

🤖 Prompt for AI Agents
In .github/workflows/sync-docs.yml at line 35, the webhook URL is hard-coded,
which is insecure. Replace the URL string with a reference to a GitHub secret
(e.g., ${{ secrets.WEBHOOK_URL }}) to secure the endpoint. Also, ensure the file
ends with a newline character to comply with POSIX standards.

3 changes: 3 additions & 0 deletions agent-docs/agentuity.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,6 @@ agents:
name: doc-processing
# The description of the Agent which is editable
description: An applicaiton that process documents
- id: agent_9ccc5545e93644bd9d7954e632a55a61
name: doc-qa
description: Agent that can answer questions based on dev docs as the knowledge base
7 changes: 4 additions & 3 deletions agent-docs/src/agents/doc-processing/docs-orchestrator.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { AgentContext } from '@agentuity/sdk';
import { processDoc } from './docs-processor';
import { VECTOR_STORE_NAME } from './config';
import type { FilePayload, SyncPayload, SyncStats } from './types';
import { VECTOR_STORE_NAME } from '../../../../config';
import type { SyncPayload, SyncStats } from './types';

/**
* Helper to remove all vectors for a given logical path from the vector store.
Expand Down Expand Up @@ -76,7 +76,8 @@ export async function syncDocsFromPayload(ctx: AgentContext, payload: SyncPayloa
...chunk.metadata,
path: logicalPath,
};
await ctx.vector.upsert(VECTOR_STORE_NAME, chunk);
const result = await ctx.vector.upsert(VECTOR_STORE_NAME, chunk);
ctx.logger.info('Upserted chunk: %o', result.length);
}

processed++;
Expand Down
10 changes: 1 addition & 9 deletions agent-docs/src/agents/doc-processing/docs-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,8 @@ import type { VectorUpsertParams } from '@agentuity/sdk';
import { chunkAndEnrichDoc } from './chunk-mdx';
import { embedChunks } from './embed-chunks';
import type { Chunk } from './chunk-mdx';
import type { ChunkMetadata } from './types';

export type ChunkMetadata = {
chunkIndex: number;
contentType: string;
heading: string;
title: string;
description: string;
text: string;
createdAt: string;
};

/**
* Processes a single .mdx doc: loads, chunks, and enriches each chunk with metadata.
Expand Down
2 changes: 1 addition & 1 deletion agent-docs/src/agents/doc-processing/embed-chunks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ export async function embedChunks(
if (!response.embeddings || response.embeddings.length !== texts.length) {
throw new Error('Embedding API returned unexpected result.');
}
}

return response.embeddings;
}
2 changes: 1 addition & 1 deletion agent-docs/src/agents/doc-processing/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import type { AgentContext, AgentRequest, AgentResponse } from '@agentuity/sdk';
import { syncDocsFromPayload } from './docs-orchestrator';
import type { FilePayload, SyncPayload } from './types';
import type { SyncPayload } from './types';

export const welcome = () => {
return {
Expand Down
13 changes: 12 additions & 1 deletion agent-docs/src/agents/doc-processing/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,15 @@ export interface SyncStats {
deleted: number;
errors: number;
errorFiles: string[];
}
}

export type ChunkMetadata = {
chunkIndex: number;
contentType: string;
heading: string;
title: string;
description: string;
text: string;
createdAt: string;
path?: string;
};
122 changes: 122 additions & 0 deletions agent-docs/src/agents/doc-qa/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import type { AgentContext, AgentRequest, AgentResponse } from '@agentuity/sdk';
import { streamText } from 'ai';
import { openai } from '@ai-sdk/openai';

import type { ChunkMetadata } from '../doc-processing/types';
import { VECTOR_STORE_NAME, vectorSearchNumber } from '../../../../config';
import type { RelevantDoc } from './types';

export default async function Agent(
req: AgentRequest,
resp: AgentResponse,
ctx: AgentContext
) {
const prompt = await req.data.text();
const relevantDocs = await retrieveRelevantDocs(ctx, prompt);

const systemPrompt = `
You are a developer documentation assistant. Your job is to answer user questions about the Agentuity platform as effectively and concisely as possible, adapting your style to the user's request. If the user asks for a direct answer, provide it without extra explanation. If they want an explanation, provide a clear and concise one. Use only the provided relevant documents to answer.
You must not make up answers if the provided documents don't exist. You can be direct to the user that the documentations
don't seem to include what they are looking for. Lying to the user is prohibited as it only slows them down. Feel free to
suggest follow up questions if what they're asking for don't seem to have an answer in the document. You can provide them
a few related things that the documents contain that may interest them.
For every answer, return a valid JSON object with:
1. "answer": your answer to the user's question.
2. "documents": an array of strings, representing the path of the documents you used to answer.
If you use information from a document, include it in the "documents" array. If you do not use any documents, return an empty array for "documents".
User question:
\`\`\`
${prompt}
\`\`\`
Relevant documents:
${JSON.stringify(relevantDocs, null, 2)}
Respond ONLY with a valid JSON object as described above. In your answer, you should format code blocks properly in Markdown style if the user needs answer in code block.
`.trim();

const llmResponse = await streamText({
model: openai('gpt-4o'),
system: systemPrompt,
prompt: prompt,
maxTokens: 2048,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This model's size is 128,000 tokens - why so low here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yeah I'll extend the size in the next PR. Mainly, I wanted to keep the generation concise and short for now. Will push its limit.

});

return resp.stream(llmResponse.textStream);
}

async function retrieveRelevantDocs(ctx: AgentContext, prompt: string): Promise<RelevantDoc[]> {
const dbQuery = {
query: prompt,
limit: vectorSearchNumber
}
try {


const vectors = await ctx.vector.search(VECTOR_STORE_NAME, dbQuery);

const uniquePaths = new Set<string>();

vectors.forEach(vec => {
if (!vec.metadata) {
ctx.logger.warn('Vector missing metadata');
return;
}
const path = typeof vec.metadata.path === 'string' ? vec.metadata.path : undefined;
if (!path) {
ctx.logger.warn('Vector metadata path is not a string');
return;
}
uniquePaths.add(path);
});

const docs = await Promise.all(
Array.from(uniquePaths).map(async path => ({
path,
content: await retrieveDocumentBasedOnPath(ctx, path)
}))
);

return docs;
} catch (err) {
ctx.logger.error('Error retrieving relevant docs: %o', err);
return [];
}
}

async function retrieveDocumentBasedOnPath(ctx: AgentContext, path: string): Promise<string> {
const dbQuery = {
query: ' ',
limit: 10000,
metadata: {
path: path
}
}
try {
const vectors = await ctx.vector.search(VECTOR_STORE_NAME, dbQuery);

// Sort vectors by chunk index and concatenate text
const sortedVectors = vectors
.map(vec => {
const metadata = vec.metadata as ChunkMetadata;
return {
metadata,
index: metadata.chunkIndex
};
})
.sort((a, b) => a.index - b.index);

const fullText = sortedVectors
.map(vec => vec.metadata.text)
.join('\n\n');

return fullText;
} catch (err) {
ctx.logger.error('Error retrieving document by path %s: %o', path, err);
return '';
}
}
5 changes: 5 additions & 0 deletions agent-docs/src/agents/doc-qa/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
export interface RelevantDoc {
path: string;
content: string;
}

Loading