From 08dc19ff9bc2f37034f2d695dd8853ab50620232 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 20 Sep 2025 19:03:35 +0000 Subject: [PATCH 1/2] Empty PR 3 - minimal comment Co-Authored-By: srith@agentuity.com --- next.config.mjs | 1 + 1 file changed, 1 insertion(+) diff --git a/next.config.mjs b/next.config.mjs index 5c5c9c75..3e9bc8f7 100644 --- a/next.config.mjs +++ b/next.config.mjs @@ -1,5 +1,6 @@ import { initOpenNextCloudflareForDev } from '@opennextjs/cloudflare'; import { createMDX } from 'fumadocs-mdx/next'; +// empty pr 3 comment const withMDX = createMDX(); From d4b1b5467194be80f3f06b640e10c0b27b4f4812 Mon Sep 17 00:00:00 2001 From: afterrburn Date: Sat, 20 Sep 2025 13:24:26 -0600 Subject: [PATCH 2/2] add doc-qa changes --- agent-docs/RAG-TODO.md | 54 ------ agent-docs/RAG-design.md | 220 ------------------------- agent-docs/RAG-user-stories.md | 116 ------------- agent-docs/README.md | 6 +- agent-docs/src/agents/doc-qa/prompt.ts | 1 - agent-docs/src/agents/doc-qa/rag.ts | 7 +- 6 files changed, 8 insertions(+), 396 deletions(-) delete mode 100644 agent-docs/RAG-TODO.md delete mode 100644 agent-docs/RAG-design.md delete mode 100644 agent-docs/RAG-user-stories.md diff --git a/agent-docs/RAG-TODO.md b/agent-docs/RAG-TODO.md deleted file mode 100644 index beb72cf3..00000000 --- a/agent-docs/RAG-TODO.md +++ /dev/null @@ -1,54 +0,0 @@ -# RAG System Implementation TODOs - -## 1. Document Chunking & Metadata -- [x] Refine and test the chunking logic for MDX files. -- [x] Implement full metadata enrichment (id, path, chunkIndex, contentType, heading, keywords) in the chunking/processing pipeline. -- [x] Write unit tests for chunking and metadata extraction. - -## 2. Keyword Extraction -- [x] Implement LLM-based keyword extraction for each chunk. -- [x] Write tests to validate keyword extraction quality. -- [ ] Integrate keyword in document processing pipeline - -## 3. Embedding Generation -- [x] Implement embedding function for batch processing of chunk texts (using OpenAI SDK or Agentuity vector store as appropriate). -- [x] Integrate embedding generation into the chunk processing pipeline. -- [ ] Write tests to ensure embeddings are generated and stored correctly. - -## 4. Vector Store Integration -- [x] Set up Agentuity vector database integration. -- [x] Store chunk content, metadata, keywords, and embeddings. - -## 5. Hybrid Retrieval Logic -- [ ] Implement hybrid search (semantic + keyword boosting). -- [ ] Write tests to ensure correct ranking and recall. - -## 6. Reranker Integration -- [ ] Integrate reranker model (API or local). -- [ ] Implement reranking step after hybrid retrieval. -- [ ] Write tests to validate reranker improves result quality. - -## 7. API Layer -- [ ] Build modular API endpoints for search and retrieval. -- [ ] Ensure endpoints are stateless and testable. -- [ ] Write API tests (unit and integration). - -## 8. UI Integration -- [ ] Add search bar and results display to documentation site. -- [ ] Implement keyword highlighting and breadcrumb navigation. -- [ ] Write UI tests for search and result presentation. - -## 9. Monitoring & Analytics -- [ ] Add logging for search queries and result quality. -- [ ] Implement feedback mechanism for users to rate results. - -## 10. Documentation & Developer Experience -- [ ] Document each module and its tests. -- [ ] Provide clear setup and usage instructions. - -## 11. Sync/Processor Workflow Design -- [x] Design the documentation sync workflow: - - [x] Primary: Trigger sync via CI/CD or GitHub Action after merges to main/deploy branch. - - [x] Optional: Implement a webhook endpoint for manual or CMS-triggered syncs. - - [x] Ensure the sync process is idempotent and efficient (only updates changed docs/chunks). - - [x] Plan for operational workflow implementation after core modules are complete. diff --git a/agent-docs/RAG-design.md b/agent-docs/RAG-design.md deleted file mode 100644 index 7e350054..00000000 --- a/agent-docs/RAG-design.md +++ /dev/null @@ -1,220 +0,0 @@ -# Documentation RAG System - Design Document - -## 1. System Overview -A Retrieval-Augmented Generation (RAG) system for Agentuity's documentation, enabling users to search for relevant documentation pages, get direct answers, and discover code examples efficiently. - ---- - -## 2. Document Chunking & Metadata - -### 2.1 Chunking -- Documents (MDX files) are split into semantically meaningful chunks (steps, paragraphs, code blocks, etc.) using custom logic. -- Each chunk is enriched with metadata for effective retrieval and navigation. - -### 2.2 Metadata Structure -```typescript -interface DocumentMetadata { - id: string; - path: string; - chunkIndex: number; - contentType: string; - heading?: string; - keywords?: string; -} -``` - -#### Field Rationale & Use Cases -- **id**: Unique retrieval, deduplication, updates. -- **path**: Navigation, linking, analytics. -- **chunkIndex**: Context window, document flow. -- **contentType**: Result presentation, filtering. -- **keywords**: Hybrid search, filtering, boosting, related content, highlighting. - ---- - -## 3. [Optional] Keyword Extraction -- **Purpose:** Boost search accuracy, enable hybrid search, support filtering, and improve UI. -- **Approach:** - - Start with simple extraction (headings, code, links, bolded text). - - For best results, use an LLM (e.g., GPT-4o) to extract 5-10 important keywords/phrases per chunk. - - Store keywords as a separate field in the metadata. - ---- - -## 4. Embedding Generation -- **Only the main content of each chunk is embedded** (not keywords or metadata). -- Use a dedicated embedding model (e.g., OpenAI's `text-embedding-3-small`). -- Store the resulting vector alongside the chunk's metadata and keywords. - ---- - -## 5. Vector Store -- Use Agentuity built in Vector storage -- Store for each chunk: - - Embedding vector - - Main content - - Metadata (id, path, chunkIndex, contentType, heading) - - Keywords - ---- - -## 6. Retrieval & Hybrid Search -- **User query flow:** - 1. Embed the user query. - 2. Search for similar vectors (semantic search). - 3. Check for keyword matches in the `keywords` field. - 4. Combine results (hybrid search), boosting those with both high semantic similarity and keyword matches. - 5. Use metadata for context and navigation in the UI. - -- **Why not embed keywords/metadata?** - - Embedding only the main content ensures high-quality semantic search. - - Keywords/metadata are used for filtering, boosting, and UI, not for semantic similarity. - ---- - -## 7. [Optional] Keyword Boosting and Highlighting - -### 7.1 Keyword Boosting in Retrieval - -**Definition:** Boosting means giving extra weight to chunks that contain keywords matching the user's query, so they appear higher in the search results—even if their semantic similarity score is not the highest. - -**How It Works:** -- When a user submits a query: - 1. **Semantic Search:** Embed the query and retrieve the top-N most similar chunks from the vector store. - 2. **Keyword Match:** Check which of these chunks have keywords that match (exactly or fuzzily) terms in the user's query. - 3. **Score Adjustment:** Increase the score (or ranking) of chunks with keyword matches. Optionally, also include chunks that have strong keyword matches but were not in the top-N semantic results. - 4. **Hybrid Ranking:** Combine the semantic similarity score and the keyword match score to produce a final ranking. - -**Technical Example:** -- For each chunk, compute: - `final_score = semantic_score + (keyword_match ? boost_value : 0)` -- Tune `boost_value` based on how much you want to favor keyword matches. - -**Why?** -- Ensures that highly relevant technical results (e.g., containing exact function names, CLI commands, or jargon) are not missed by the embedding model. -- Improves recall for precise, technical queries. - ---- - -### 7.2 Keyword Highlighting in the UI - -**Definition:** Highlighting means visually emphasizing the keywords in the search results that match the user's query, making it easier for users to spot why a result is relevant. - -**How It Works:** -- When displaying a result chunk: - 1. Compare the user's query terms to the chunk's keywords. - 2. In the displayed snippet, bold or color the matching keywords. - 3. Optionally, also highlight those keywords in the context of the chunk's content. - -**User Experience Example:** -- User searches for: `install CLI on Linux` -- Result snippet: - ``` - The **Agentuity CLI** is a cross-platform command-line tool for working with Agentuity Cloud. It supports **Windows** (using WSL), **MacOS**, and **Linux**. - ``` -- The keywords "Agentuity CLI" and "Linux" are highlighted, helping the user quickly see the match. - -**Why?** -- Increases user trust in the search system by making relevance transparent. -- Helps users scan results faster, especially in technical documentation with dense information. - ---- - -### 7.3 Summary Table - -| Feature | Purpose | Technical Step | User Benefit | -|--------------|-----------------------------------------|---------------------------------------|-------------------------------------| -| Boosting | Improve ranking of keyword matches | Adjust score/rank in retrieval | More relevant results at the top | -| Highlighting | Make matches visible in the UI | Bold/color keywords in result display | Easier, faster result comprehension | - ---- - -### 7.4 Optional Enhancements -- Allow users to filter results by keyword/facet. -- Show a "Why this result?" tooltip listing matched keywords. - ---- - -## 8. Reranker Integration - -### 8.1 What is a Reranker? -A reranker is a model (often a cross-encoder or LLM) that takes a set of candidate results (retrieved by semantic/keyword/hybrid search) and scores them for relevance to the user's query, often with much higher accuracy than the initial retrieval. - -### 8.2 Where Does It Fit? -- The reranker is applied **after** the hybrid retrieval (semantic + keyword boosting) step. -- It takes the top-N candidate chunks and the user query, and produces a new, more accurate ranking. -- The final answer generated based on the top n context after reranked. - -### 8.3 Retrieval Pipeline with Reranker - -1. **User Query** -2. **Hybrid Retrieval** (semantic + keyword search, with boosting) -3. **Top-N Candidates** -4. **Reranker Model** (scores each candidate for true relevance) -5. **Final Generated Answer** (displayed to user) - -### 8.4 Example Models -- OpenAI GPT-4o or GPT-3.5-turbo (with a ranking prompt) -- Cohere Rerank API -- bge-reranker (open-source, HuggingFace) -- ColBERT, MonoT5, or other cross-encoders - -### 8.5 Benefits -- **Higher Precision:** Deeply understands context and technical terms. -- **Better Handling of Ambiguity:** Picks the best answer among similar candidates. -- **Improved User Trust:** More relevant answers at the top. - -### 8.6 Why Keep Keyword Search? -- Keyword search ensures exact matches for technical terms are not missed. -- Hybrid search provides the reranker with the best possible candidate set. -- Removing keyword search would reduce recall and technical accuracy. - -### 8.7 Updated Retrieval Flow Diagram - -```mermaid -graph TD - A[User Query] --> B[Hybrid Retriever (Embeddings + Keywords)] - B --> C[Top-N Candidates] - C --> D[Reranker Model] - D --> E[Final Answer] -``` - ---- - -## 9. UI Integration -- Add a search bar and results display to the documentation site. -- Show direct answers, code snippets, and links to full docs, with keyword highlighting and breadcrumb navigation. - ---- - -## 10. Technology Stack -| Step | Technology/Tool | Notes | -|---------------------|-------------------------------|----------------------------------------------------| -| Chunking | TypeScript logic | `chunk-mdx.ts` | -| Keyword Extraction | LLM (GPT-4o, GPT-3.5-turbo) | API call per chunk; can batch for efficiency | -| Embedding | OpenAI Embedding API | `text-embedding-3-small` or similar | -| Vector Store | pgvector, Pinecone, Weaviate | Choose based on infra preference | -| Retrieval API | Next.js API route | Combines vector and keyword search | -| UI | Next.js/React | Search bar, results, highlighting, navigation | - ---- - -## 11. Example Metadata for a Chunk -```json -{ - "id": "introduction-getting-started-1", - "path": "/introduction/getting-started", - "chunkIndex": 1, - "contentType": "step", - "heading": "Install the CLI", - "keywords": "Agentuity CLI, CLI installation, command-line tool, cross-platform, Windows, WSL, MacOS, Linux, curl, installation" -} -``` - ---- - -## 12. Summary -- Only main content is embedded; keywords and metadata are stored separately. -- Hybrid search (semantic + keyword) provides the best retrieval experience. -- Metadata supports navigation, filtering, and UI context. -- LLM-powered keyword extraction is recommended for technical accuracy. \ No newline at end of file diff --git a/agent-docs/RAG-user-stories.md b/agent-docs/RAG-user-stories.md deleted file mode 100644 index 237eb632..00000000 --- a/agent-docs/RAG-user-stories.md +++ /dev/null @@ -1,116 +0,0 @@ -# Documentation RAG System - User Stories - -## Core User Stories - -### 1. Quick Answer Search -**As a** developer using Agentuity's documentation -**I want to** get quick, accurate answers to my specific questions -**So that** I can solve problems without reading through entire documentation pages - -**Example:** -- "How do I implement streaming responses with OpenAI models?" -- "What's the difference between Agent and AgentRequest?" -- "How do I handle errors in my agent?" - -### 2. Documentation Navigation -**As a** developer exploring Agentuity's documentation -**I want to** find relevant documentation pages based on my topic of interest -**So that** I can learn about features and concepts in a structured way - -**Example:** -- "Show me pages about authentication" -- "Where can I learn about agent templates?" -- "Find documentation about error handling" - -### 3. Code Example Discovery -**As a** developer implementing Agentuity features -**I want to** find relevant code examples quickly -**So that** I can understand how to implement specific functionality - -**Example:** -- "Show me examples of implementing custom tools" -- "How do I structure an agent response?" -- "Find code samples for error handling" - -## User Experience Flows - -### Flow 1: Direct Answer Search -1. User types a specific question in the search bar -2. System returns: - - Direct answer to the question - - Relevant code snippet (if applicable) - - Link to the full documentation page - - Related topics/pages - -### Flow 2: Topic Exploration -1. User searches for a general topic -2. System returns: - - List of relevant documentation pages - - Brief context for each page - - Hierarchical navigation (breadcrumbs) - - Related topics - -### Flow 3: Code Example Search -1. User searches for implementation examples -2. System returns: - - Relevant code snippets - - Context for each example - - Link to full documentation - - Related examples - -## Success Criteria - -### For Quick Answers -- Answers are accurate and up-to-date -- Responses include relevant code snippets when applicable -- Links to full documentation are provided -- Related topics are suggested - -### For Documentation Navigation -- Search results are well-organized -- Breadcrumb navigation is clear -- Related topics are logically connected -- Results are ranked by relevance - -### For Code Examples -- Code snippets are complete and runnable -- Examples include necessary context -- Links to full documentation are provided -- Related examples are suggested - -## Edge Cases to Consider - -1. **Ambiguous Queries** - - User asks a question that could relate to multiple topics - - System should provide disambiguation options - -2. **Out-of-Scope Questions** - - User asks about features not in the documentation - - System should clearly indicate what's not covered - -3. **Technical Depth** - - User might need different levels of technical detail - - System should provide both high-level and detailed answers - -4. **Version-Specific Information** - - User might be using a specific version - - System should indicate version compatibility - -## User Interface Considerations - -### Search Interface -- Global search bar in documentation header -- Clear indication of search scope -- Quick filters for content types (All, Code, Guides, etc.) - -### Results Display -- Clear distinction between direct answers and page references -- Code snippets with syntax highlighting -- Breadcrumb navigation -- Related topics section - -### Navigation -- Easy way to refine search -- Clear path to full documentation -- Related topics suggestions -- History of recent searches \ No newline at end of file diff --git a/agent-docs/README.md b/agent-docs/README.md index 27ba6522..140f6eee 100644 --- a/agent-docs/README.md +++ b/agent-docs/README.md @@ -42,8 +42,12 @@ Follow the interactive prompts to configure your agent. ### Development Mode -Run your project in development mode with: +Make sure bun packages are properly installed: +```bash +bun install +``` +Run your project in development mode with: ```bash agentuity dev ``` diff --git a/agent-docs/src/agents/doc-qa/prompt.ts b/agent-docs/src/agents/doc-qa/prompt.ts index cec823fc..24aa5f01 100644 --- a/agent-docs/src/agents/doc-qa/prompt.ts +++ b/agent-docs/src/agents/doc-qa/prompt.ts @@ -55,7 +55,6 @@ Return ONLY the query text, nothing else.`; }); const rephrasedQuery = result.text?.trim() || input; - console.log(rephrasedQuery); // Log if we actually rephrased it if (rephrasedQuery !== input) { ctx.logger.info( diff --git a/agent-docs/src/agents/doc-qa/rag.ts b/agent-docs/src/agents/doc-qa/rag.ts index 3140fd26..5e13215e 100644 --- a/agent-docs/src/agents/doc-qa/rag.ts +++ b/agent-docs/src/agents/doc-qa/rag.ts @@ -25,8 +25,7 @@ Your role is to be as helpful as possible and try to assist user by answering th === RULES === 1. Use ONLY the content inside tags to craft your reply. If the required information is missing, state that the docs do not cover it. 2. Never fabricate or guess undocumented details. -3. Focus on answering the QUESTION with the available provided to you. Keep in mind some might not be relevant, - so pick the ones that is relevant to the user's question. +3. Focus on answering the QUESTION with the available provided to you. Keep in mind some might not be relevant, so pick the ones that is relevant to the user's question. 4. Ambiguity handling: • When contains more than one distinct workflow or context that could satisfy the question, do **not** choose for the user. • Briefly (≤ 2 sentences each) summarise each plausible interpretation and ask **one** clarifying question so the user can pick a path. @@ -41,8 +40,8 @@ Your role is to be as helpful as possible and try to assist user by answering th • Use **bold** for important terms and *italic* for emphasis when appropriate. • Use > blockquotes for important notes or warnings. 6. You may suggest concise follow-up questions or related topics that are present in . -7. Keep a neutral, factual tone. - +7. If do not answer the question, state that explicitly and offer the closest documented topic; answer strictly from or ask one clarifying question if nothing related exists. +8. Keep a neutral, factual tone. === OUTPUT FORMAT === Return **valid JSON only** matching this TypeScript type: