Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 18 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ Rapid assessment and retrieval from knowledge graph using Neo4j GraphRAG.

## Overview

Scouter is a knowledge graph-based document retrieval system that:
Scouter is a knowledge graph-based document retrieval system focused on MCP (Model Context Protocol) for agentic search:

- Ingests PDFs and text documents using Neo4j GraphRAG's SimpleKGPipeline
- Provides fast semantic search with relevance scoring
- Supports both API and MCP (Model Context Protocol) interfaces
- Provides agentic semantic search via MCP for LLM integration
- Includes REST API for document ingestion
- Includes evaluation framework for retrieval quality assessment

## Quick Start
Expand Down Expand Up @@ -62,31 +62,25 @@ curl -X POST "http://localhost:8000/v1/ingest" \
-d '{"text": "Your document content", "metadata": {"source": "api"}}'
```

### Search

```bash
# Search documents
curl "http://localhost:8000/v1/search?query=your%20search%20term&limit=5"
```

### Interactive API

Visit <http://localhost:8000/docs> for interactive API documentation.

**Note:** Search functionality is provided via MCP (Model Context Protocol) for agentic retrieval. Direct REST search API is not available.

## Architecture

### Components

- **Ingestion Service**: Processes PDFs/text into knowledge graph using SimpleKGPipeline
- **Search Service**: Performs semantic search with relevance scoring
- **MCP Server**: Provides Model Context Protocol interface for LLM integration
- **MCP Server**: Core component providing agentic search via Model Context Protocol for LLM integration
- **Celery Workers**: Handle async document processing
- **Redis**: Task queue and caching

### Data Flow

1. Documents → Ingestion API → Celery Queue → Neo4j GraphRAG
2. Search Query → Search API → Neo4j → Ranked Results
2. Search Query → MCP Server → Agentic Search → Neo4j → Ranked Results

## Development

Expand Down Expand Up @@ -143,24 +137,26 @@ The project uses Neo4j with APOC plugin for enhanced graph procedures. Docker se

## Examples

### RAG Chatbot
### MCP Integration (Primary Use Case)

```bash
cd examples/chatbot
python chatbot.py
# Start MCP server
python -m scouter_app.agent.mcp

# Use with Claude Desktop or other MCP-compatible tools
```

Interactive chatbot that uses Scouter for retrieval and OpenRouter for generation.
Scouter's MCP server enables agentic search for LLMs, providing semantic retrieval from the knowledge graph.

### MCP Integration
### RAG Chatbot

```bash
# Start MCP server
python -m scouter_app.agent.mcp

# Use with Claude Desktop or other MCP-compatible tools
cd examples/chatbot
python chatbot.py
```

Interactive chatbot that uses Scouter for retrieval and OpenRouter for generation.

## Project Structure

```
Expand Down
6 changes: 3 additions & 3 deletions app_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

from fastapi import FastAPI

from src.scouter_app.agent.mcp import app as mcp_app
from src.scouter_app.config.llm import get_client_config
from src.scouter_app.ingestion.api import router as ingestion_router
from src.scouter.agent.mcp import app as mcp_app
from src.scouter.config.llm import get_client_config
from src.scouter.ingestion.api import router as ingestion_router

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion evals/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import pytest

from scouter_app.ingestion.service import IngestionService
from scouter.ingestion.service import IngestionService

from .utils import create_light_subset

Expand Down
1 change: 1 addition & 0 deletions evals/test_retrieval_relevancy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from deepeval.test_case import LLMTestCase

from examples.chatbot.chatbot import chat_with_rag

from .utils import OpenRouterLLM

THRESHOLD = 0.5
Expand Down
12 changes: 5 additions & 7 deletions examples/chatbot/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from mcp import ClientSession
from mcp.client.stdio import StdioServerParameters, stdio_client

from scouter_app.config.llm import (
from scouter.config.llm import (
DEFAULT_MODEL,
call_with_rate_limit,
get_chatbot_client,
Expand All @@ -32,8 +32,6 @@ async def chat_with_rag(query: str) -> str:

mcp_tools = await session.list_tools()

print(mcp_tools)

# Convert MCP tools to OpenAI format
openai_tools = [
{
Expand Down Expand Up @@ -72,15 +70,15 @@ async def chat_with_rag(query: str) -> str:
tool_args = json.loads(tool_call.function.arguments)
result = await session.call_tool(tool_name, tool_args)
# Add to messages
messages.append( # type: ignore
{"role": "assistant", "content": "", "tool_calls": [tool_call]} # type: ignore
messages.append( # type: ignore[PGH003]
{"role": "assistant", "content": "", "tool_calls": [tool_call]} # type: ignore[PGH003]
)
messages.append( # type: ignore
messages.append( # type: ignore[PGH003]
{
"role": "tool",
"content": str(result),
"tool_call_id": tool_call.id,
} # type: ignore
} # type: ignore[PGH003]
)

# Call LLM again with updated messages
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ asyncio_mode = "auto"
[tool.ruff]
extend = "ruff.toml"

[tool.hatch.build.targets.wheel]
packages = ["src/scouter"]

[dependency-groups]
dev = [
"pre-commit>=4.5.0",
Expand Down
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions src/scouter_app/agent/agent.py → src/scouter/agent/agent.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json

from scouter_app.agent.tools import get_tools
from scouter_app.config.llm import (
from scouter.agent.tools import get_tools
from scouter.config.llm import (
DEFAULT_MODEL,
call_with_rate_limit,
get_scouter_client,
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions src/scouter_app/agent/tools.py → src/scouter/agent/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from neo4j_graphrag.retrievers import VectorRetriever
from pydantic import BaseModel, Field

from scouter_app.config.llm import get_neo4j_driver, get_neo4j_embedder
from scouter_app.shared.domain_models import VectorSearchResult
from scouter.config.llm import get_neo4j_driver, get_neo4j_embedder
from scouter.shared.domain_models import VectorSearchResult


class SemanticSearchParams(BaseModel):
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def get_neo4j_embedder() -> SentenceTransformerEmbeddings:
def call_with_rate_limit(client: openai.OpenAI, **kwargs):
"""Call OpenAI client with rate limit handling."""
max_retries = 5
for attempt in range(max_retries): # noqa: PERF203
for attempt in range(max_retries):
try:
return client.chat.completions.create(**kwargs)
except openai.RateLimitError: # noqa: PERF203
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

from fastapi import APIRouter, Form, UploadFile

from scouter_app.config.llm import get_client_config
from scouter_app.ingestion.tasks import process_document_task
from scouter_app.shared.domain_models import IngestResponse
from scouter.config.llm import get_client_config
from scouter.ingestion.tasks import process_document_task
from scouter.shared.domain_models import IngestResponse

router = APIRouter()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline

from scouter_app.config.llm import get_neo4j_driver, get_neo4j_embedder, get_neo4j_llm
from scouter.config.llm import get_neo4j_driver, get_neo4j_embedder, get_neo4j_llm


class IngestionService:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@

from celery import Celery

from scouter_app.ingestion.service import IngestionService
from scouter.ingestion.service import IngestionService

app = Celery(
"scouter_app.ingestion.tasks",
"scouter.ingestion.tasks",
broker=os.getenv("REDIS_URL", "redis://localhost:6379/0"),
)

Expand Down
53 changes: 53 additions & 0 deletions src/scouter/llmcore/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from .agent import AgentRun, run_agent
from .client import ChatCompletionOptions, LLMConfig, call_llm, create_llm_client
from .exceptions import AgentError, LLMError, ToolExecutionError
from .tools import (
Tool,
create_tool,
execute_tool,
lookup_tool,
register_tool,
run_tool,
tool,
)
from .types import (
ChatCompletion,
ChatCompletionAssistantMessageParam,
ChatCompletionMessage,
ChatCompletionMessageParam,
ChatCompletionMessageToolCall,
ChatCompletionSystemMessageParam,
ChatCompletionToolMessageParam,
ChatCompletionToolParam,
ChatCompletionUserMessageParam,
)
from .utils import retry_loop

__all__ = [
"AgentError",
"AgentRun",
"ChatCompletion",
"ChatCompletionAssistantMessageParam",
"ChatCompletionMessage",
"ChatCompletionMessageParam",
"ChatCompletionMessageToolCall",
"ChatCompletionOptions",
"ChatCompletionSystemMessageParam",
"ChatCompletionToolMessageParam",
"ChatCompletionToolParam",
"ChatCompletionUserMessageParam",
"LLMConfig",
"LLMError",
"Tool",
"ToolExecutionError",
"call_llm",
"create_llm_client",
"create_tool",
"execute_tool",
"lookup_tool",
"register_tool",
"retry_loop",
"run_agent",
"run_tool",
"tool",
]
Loading
Loading