From 9978894969acee586221ddd8df1bc45236c92c6a Mon Sep 17 00:00:00 2001
From: farhoud <farhoud.m7@gmail.com>
Date: Mon, 8 Dec 2025 19:57:38 +0330
Subject: [PATCH 1/8] Remove old scouter_app module structure

---
 src/scouter_app/__init__.py             |   0
 src/scouter_app/agent/__init__.py       |   0
 src/scouter_app/agent/agent.py          |  69 ---------------
 src/scouter_app/agent/mcp.py            |  27 ------
 src/scouter_app/agent/tools.py          |  53 ------------
 src/scouter_app/config/__init__.py      |   0
 src/scouter_app/config/llm.py           | 107 ------------------------
 src/scouter_app/ingestion/__init__.py   |   0
 src/scouter_app/ingestion/api.py        |  59 -------------
 src/scouter_app/ingestion/service.py    |  67 ---------------
 src/scouter_app/ingestion/tasks.py      |  51 -----------
 src/scouter_app/shared/__init__.py      |   0
 src/scouter_app/shared/domain_models.py |  58 -------------
 13 files changed, 491 deletions(-)
 delete mode 100644 src/scouter_app/__init__.py
 delete mode 100644 src/scouter_app/agent/__init__.py
 delete mode 100644 src/scouter_app/agent/agent.py
 delete mode 100644 src/scouter_app/agent/mcp.py
 delete mode 100644 src/scouter_app/agent/tools.py
 delete mode 100644 src/scouter_app/config/__init__.py
 delete mode 100644 src/scouter_app/config/llm.py
 delete mode 100644 src/scouter_app/ingestion/__init__.py
 delete mode 100644 src/scouter_app/ingestion/api.py
 delete mode 100644 src/scouter_app/ingestion/service.py
 delete mode 100644 src/scouter_app/ingestion/tasks.py
 delete mode 100644 src/scouter_app/shared/__init__.py
 delete mode 100644 src/scouter_app/shared/domain_models.py

diff --git a/src/scouter_app/__init__.py b/src/scouter_app/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/scouter_app/agent/__init__.py b/src/scouter_app/agent/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/scouter_app/agent/agent.py b/src/scouter_app/agent/agent.py
deleted file mode 100644
index 3f1addf..0000000
--- a/src/scouter_app/agent/agent.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import json
-
-from scouter_app.agent.tools import get_tools
-from scouter_app.config.llm import (
-    DEFAULT_MODEL,
-    call_with_rate_limit,
-    get_scouter_client,
-)
-
-
-def handle_tool_calls(response, tools, client, messages):
-    if not response.choices[0].message.tool_calls:
-        return response.choices[0].message.content, messages
-
-    # Append assistant message with tool calls
-    messages.append(response.choices[0].message)
-
-    for tool_call in response.choices[0].message.tool_calls:
-        tool_name = tool_call.function.name
-        args = json.loads(tool_call.function.arguments)
-        for tool in tools:
-            if tool["function"]["name"] == tool_name:
-                result = tool["callable"](**args)
-                # Append tool result message
-                messages.append(
-                    {
-                        "role": "tool",
-                        "tool_call_id": tool_call.id,
-                        "content": json.dumps([r.model_dump() for r in result])
-                        if isinstance(result, list)
-                        else json.dumps(result),
-                    }
-                )
-                break
-
-    # Follow-up call for multi-turn
-    follow_up = call_with_rate_limit(
-        client,
-        model=DEFAULT_MODEL,
-        messages=messages,  # type: ignore[arg-type]
-        tools=[t["function"] for t in tools],
-        tool_choice="auto",
-    )
-    return handle_tool_calls(follow_up, tools, client, messages)
-
-
-def search_agent(query: str, hints: str = ""):
-    client = get_scouter_client()
-    tools = get_tools()
-    openai_tools = [t["function"] for t in tools]
-    system_content = (
-        "You are a search agent. Use the available tools to answer the user's query."
-    )
-    if hints:
-        system_content += f"\n\nHints:\n{hints}"
-    messages = [
-        {"role": "system", "content": system_content},
-        {"role": "user", "content": query},
-    ]
-    response = call_with_rate_limit(
-        client,
-        model=DEFAULT_MODEL,
-        messages=messages,  # type: ignore[arg-type]
-        tools=openai_tools,
-        tool_choice="auto",
-        max_tokens=200,
-    )
-    final_response, _ = handle_tool_calls(response, tools, client, messages)
-    return final_response
diff --git a/src/scouter_app/agent/mcp.py b/src/scouter_app/agent/mcp.py
deleted file mode 100644
index b499682..0000000
--- a/src/scouter_app/agent/mcp.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from fastmcp import FastMCP
-
-from .agent import search_agent
-
-app = FastMCP("Scouter Agent")
-
-
-@app.tool()
-def search_knowledge_graph(query: str, hints: str = "") -> str:
-    """Search the knowledge graph for information related to the query using semantic search.
-
-    This tool allows LLMs to retrieve relevant documents and knowledge from the Scouter knowledge graph.
-    It performs vector-based semantic search, returning the most relevant results.
-
-    Args:
-        query: The search query string to find relevant information
-        hints: Optional hints to guide the search agent
-
-    Returns:
-        A response string containing search results and analysis
-
-    """
-    return search_agent(query, hints)
-
-
-if __name__ == "__main__":
-    app.run()
diff --git a/src/scouter_app/agent/tools.py b/src/scouter_app/agent/tools.py
deleted file mode 100644
index ea3cf41..0000000
--- a/src/scouter_app/agent/tools.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import ast
-
-from neo4j_graphrag.retrievers import VectorRetriever
-from pydantic import BaseModel, Field
-
-from scouter_app.config.llm import get_neo4j_driver, get_neo4j_embedder
-from scouter_app.shared.domain_models import VectorSearchResult
-
-
-class SemanticSearchParams(BaseModel):
-    query_text: str = Field(description="exact user query")
-    top_k: int = Field(default=10, description="Number of results to return (1-20)")
-    filters: dict | None = Field(default=None, description="Optional filters")
-    effective_search_ratio: float = Field(
-        default=1.0, description="Search pool multiplier for better accuracy"
-    )
-
-
-def _get_vector_search_tool():
-    retriever = VectorRetriever(
-        driver=get_neo4j_driver(),
-        index_name="chunkEmbedding",
-        embedder=get_neo4j_embedder(),
-    )
-
-    def semantic_search(**kwargs):
-        params = SemanticSearchParams(**kwargs)
-        raw_results = retriever.search(**params.model_dump())
-        items = raw_results.items
-        return [
-            VectorSearchResult(
-                node_id=data.get("id", "unknown"),
-                score=data.get("score", 0.0),
-                content=data.get("text", ""),
-                metadata=data,
-            )
-            for result in items
-            for data in [ast.literal_eval(result.content)]
-        ]
-
-    return {
-        "type": "function",
-        "function": {
-            "name": "semantic_search",
-            "description": "find relative information based on cosine similarity",
-            "parameters": SemanticSearchParams.model_json_schema(),
-        },
-        "callable": semantic_search,
-    }
-
-
-def get_tools():
-    return [_get_vector_search_tool()]
diff --git a/src/scouter_app/config/__init__.py b/src/scouter_app/config/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/scouter_app/config/llm.py b/src/scouter_app/config/llm.py
deleted file mode 100644
index c4d8217..0000000
--- a/src/scouter_app/config/llm.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import os
-import time
-from functools import lru_cache
-
-import openai
-from neo4j_graphrag.embeddings import SentenceTransformerEmbeddings
-from neo4j_graphrag.llm import OpenAILLM
-from pydantic import model_validator
-from pydantic_settings import BaseSettings
-
-import neo4j
-from neo4j import GraphDatabase
-
-DEFAULT_MODEL = "openai/gpt-oss-20b:free"
-
-
-class ClientConfig(BaseSettings):
-    provider: str = "openai"
-    api_key: str | None = None
-    model: str = DEFAULT_MODEL
-    api_base: str | None = None
-    temperature: float = 0.7
-    max_tokens: int | None = None
-    env: str = "test"
-
-    @model_validator(mode="after")
-    def set_provider_defaults(self):
-        if self.provider == "openrouter":
-            self.api_base = self.api_base or "https://openrouter.ai/api/v1"
-            self.api_key = self.api_key or os.getenv("OPENROUTER_API_KEY")
-        else:
-            self.api_key = self.api_key or os.getenv("OPENAI_API_KEY")
-        if not self.api_key:
-            msg = f"API key required for provider {self.provider}"
-            raise ValueError(msg)
-        if self.env not in ["development", "production", "test"]:
-            msg = "env must be one of: development, production, test"
-            raise ValueError(msg)
-        return self
-
-    def __init__(self, **data):
-        super().__init__(**data)
-        if self.provider == "openrouter":
-            self.api_base = self.api_base or "https://openrouter.ai/api/v1"
-            self.api_key = os.getenv("OPENROUTER_API_KEY", self.api_key)
-            # Map model if needed, e.g., self.model = "openai/gpt-3.5-turbo"
-        self.env = os.getenv("SCOUTER_ENV", self.env)
-
-
-def get_client_config(provider: str = "openai") -> ClientConfig:
-    return ClientConfig(provider=provider)
-
-
-def create_client(config: ClientConfig) -> openai.OpenAI:
-    return openai.OpenAI(
-        api_key=config.api_key,
-        base_url=config.api_base,
-        max_retries=0,  # Disable built-in retries to let our wrapper handle rate limits
-    )
-
-
-def get_chatbot_client() -> openai.OpenAI:
-    config = get_client_config("openrouter")
-    config.temperature = 0.9  # More creative for chatbot
-    return create_client(config)
-
-
-def get_scouter_client() -> openai.OpenAI:
-    config = get_client_config("openrouter")
-    config.temperature = 0.0  # Deterministic for retrieval
-    return create_client(config)
-
-
-@lru_cache(maxsize=1)
-def get_neo4j_driver() -> neo4j.Driver:
-    uri = os.getenv("NEO4J_URI", "bolt://localhost:7687")
-    user = os.getenv("NEO4J_USER", "neo4j")
-    password = os.getenv("NEO4J_PASSWORD", "password")
-    return GraphDatabase.driver(uri, auth=(user, password))
-
-
-@lru_cache(maxsize=1)
-def get_neo4j_llm() -> OpenAILLM:
-    config = get_client_config("openrouter")
-    return OpenAILLM(config.model, api_key=config.api_key, base_url=config.api_base)
-
-
-@lru_cache(maxsize=1)
-def get_neo4j_embedder() -> SentenceTransformerEmbeddings:
-    return SentenceTransformerEmbeddings("Qwen/Qwen3-Embedding-0.6B")
-
-
-def call_with_rate_limit(client: openai.OpenAI, **kwargs):
-    """Call OpenAI client with rate limit handling."""
-    max_retries = 5
-    for attempt in range(max_retries):  # noqa: PERF203
-        try:
-            return client.chat.completions.create(**kwargs)
-        except openai.RateLimitError:  # noqa: PERF203
-            if attempt < max_retries - 1:
-                wait_time = 2**attempt  # Exponential backoff
-                time.sleep(wait_time)
-            else:
-                raise
-        except Exception:
-            raise
-    return None  # Unreachable
diff --git a/src/scouter_app/ingestion/__init__.py b/src/scouter_app/ingestion/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/scouter_app/ingestion/api.py b/src/scouter_app/ingestion/api.py
deleted file mode 100644
index 0d2a960..0000000
--- a/src/scouter_app/ingestion/api.py
+++ /dev/null
@@ -1,59 +0,0 @@
-"""API endpoints for document ingestion."""
-
-import json
-import tempfile
-
-from fastapi import APIRouter, Form, UploadFile
-
-from scouter_app.config.llm import get_client_config
-from scouter_app.ingestion.tasks import process_document_task
-from scouter_app.shared.domain_models import IngestResponse
-
-router = APIRouter()
-
-
-@router.post("/v1/ingest", response_model=IngestResponse, status_code=202)
-async def ingest_document(
-    file: UploadFile | None = None,
-    text: str | None = Form(None),
-    metadata: str = Form("{}"),
-) -> IngestResponse:
-    """Ingest a PDF file or raw text into the knowledge graph asynchronously.
-
-    Provide either 'file' (PDF) or 'text', not both.
-
-    Args:
-        file: PDF file to ingest.
-        text: Raw text content to ingest.
-        metadata: JSON string containing metadata.
-
-    Returns:
-        IngestResponse with task ID and status.
-
-    Raises:
-        ValueError: If input validation fails.
-    """
-    # Parse metadata
-    try:
-        metadata_dict = json.loads(metadata)
-    except json.JSONDecodeError:
-        metadata_dict = {}
-
-    # Validate input
-    if (file is None and text is None) or (file is not None and text is not None):
-        msg = "Exactly one of 'file' or 'text' must be provided"
-        raise ValueError(msg)
-
-    task_data = {"metadata": metadata_dict}
-
-    if file is not None:
-        # Save file to temp location
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
-            temp_file.write(await file.read())
-            task_data["file_path"] = temp_file.name
-    else:
-        task_data["text"] = text
-
-    config = get_client_config()
-    task = process_document_task.apply_async(args=[task_data])
-    return IngestResponse(task_id=task.id, status="accepted", env=config.env)
diff --git a/src/scouter_app/ingestion/service.py b/src/scouter_app/ingestion/service.py
deleted file mode 100644
index 3d930c4..0000000
--- a/src/scouter_app/ingestion/service.py
+++ /dev/null
@@ -1,67 +0,0 @@
-"""Service for ingesting documents into the knowledge graph."""
-
-from typing import Any
-
-from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline
-
-from scouter_app.config.llm import get_neo4j_driver, get_neo4j_embedder, get_neo4j_llm
-
-
-class IngestionService:
-    """Service for ingesting documents into Neo4j knowledge graph."""
-
-    def __init__(self) -> None:
-        """Initialize the ingestion service with Neo4j connection."""
-        self.driver = get_neo4j_driver()
-        self.llm = get_neo4j_llm()
-        self.embedder = get_neo4j_embedder()
-
-    async def process_document(
-        self,
-        file_path: str | None = None,
-        text: str | None = None,
-        metadata: dict[str, Any] | None = None,
-    ) -> dict[str, Any]:
-        """Process a PDF file or text into the knowledge graph using SimpleKGPipeline.
-
-        Args:
-            file_path: Path to PDF file to process.
-            text: Text content to process.
-            metadata: Additional metadata for the document.
-
-        Returns:
-            Dictionary containing processing status and type.
-
-        Raises:
-            ValueError: If neither file_path nor text is provided.
-        """
-        if metadata is None:
-            metadata = {}
-
-        if file_path is None and text is None:
-            msg = "Either file_path or text must be provided"
-            raise ValueError(msg)
-
-        try:
-            from_pdf = file_path is not None
-            kg_builder = SimpleKGPipeline(
-                llm=self.llm,
-                driver=self.driver,
-                embedder=self.embedder,
-                from_pdf=from_pdf,
-            )
-            if from_pdf:
-                await kg_builder.run_async(
-                    file_path=file_path,
-                    document_metadata=metadata,
-                )
-            else:
-                await kg_builder.run_async(text=text, document_metadata=metadata)
-        except OSError as e:
-            return {"status": "failed", "error": str(e)}
-        else:
-            return {"status": "processed", "type": "pdf" if from_pdf else "text"}
-
-    def close(self) -> None:
-        """Close the Neo4j driver connection."""
-        self.driver.close()
diff --git a/src/scouter_app/ingestion/tasks.py b/src/scouter_app/ingestion/tasks.py
deleted file mode 100644
index 5587491..0000000
--- a/src/scouter_app/ingestion/tasks.py
+++ /dev/null
@@ -1,51 +0,0 @@
-"""Celery tasks for asynchronous document processing."""
-
-import asyncio
-import concurrent.futures
-import os
-from typing import Any
-
-from celery import Celery
-
-from scouter_app.ingestion.service import IngestionService
-
-app = Celery(
-    "scouter_app.ingestion.tasks",
-    broker=os.getenv("REDIS_URL", "redis://localhost:6379/0"),
-)
-
-
-@app.task
-def process_document_task(task_data: dict[str, Any]) -> dict[str, Any]:
-    """Long-running task to process PDF or text into the knowledge graph.
-
-    Args:
-        task_data: Dictionary containing file_path, text, and metadata.
-
-    Returns:
-        Dictionary with processing result.
-    """
-
-    async def run_async() -> dict[str, Any]:
-        service = IngestionService()
-        try:
-            file_path = task_data.get("file_path")
-            text = task_data.get("text")
-            metadata = task_data.get("metadata", {})
-            return await service.process_document(
-                file_path=file_path,
-                text=text,
-                metadata=metadata,
-            )
-        finally:
-            service.close()
-
-    # Run the async function in the event loop
-    loop = asyncio.get_event_loop()
-    if loop.is_running():
-        # If there's already a running loop, use ThreadPoolExecutor
-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            future = executor.submit(asyncio.run, run_async())
-            return future.result()
-    else:
-        return asyncio.run(run_async())
diff --git a/src/scouter_app/shared/__init__.py b/src/scouter_app/shared/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/scouter_app/shared/domain_models.py b/src/scouter_app/shared/domain_models.py
deleted file mode 100644
index 3ff7313..0000000
--- a/src/scouter_app/shared/domain_models.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from fastapi import UploadFile
-from pydantic import BaseModel, Field, model_validator
-
-
-class DocumentIngestRequest(BaseModel):
-    file: UploadFile | None = Field(
-        None,
-        description="PDF file to ingest (mutually exclusive with text)",
-    )
-    text: str | None = Field(
-        None,
-        description="Raw text content to ingest (mutually exclusive with file)",
-    )
-    metadata: dict = Field(
-        default_factory=dict,
-        description="Additional metadata for the document",
-    )
-
-    @model_validator(mode="after")
-    def validate_input(self):
-        if self.file is not None and self.text is not None:
-            msg = "Exactly one of 'file' or 'text' must be provided, not both"
-            raise ValueError(
-                msg,
-            )
-        if self.file is None and self.text is None:
-            msg = "Exactly one of 'file' or 'text' must be provided"
-            raise ValueError(msg)
-        if self.file is not None:
-            filename = self.file.filename
-            if not filename or not filename.lower().endswith(".pdf"):
-                msg = "Only PDF files are supported"
-                raise ValueError(msg)
-        return self
-
-
-class SearchRequest(BaseModel):
-    query: str = Field(..., description="The search query string")
-    limit: int = Field(default=10, description="Maximum number of results to return")
-
-
-class SearchResult(BaseModel):
-    content: str = Field(..., description="Retrieved content snippet")
-    score: float = Field(..., description="Relevance score of the result")
-    node_id: str = Field(..., description="ID of the graph node")
-
-
-class VectorSearchResult(BaseModel):
-    node_id: str = Field(description="Unique node identifier")
-    score: float = Field(description="Similarity score")
-    content: str = Field(description="Retrieved content")
-    metadata: dict | None = Field(default=None, description="Additional metadata")
-
-
-class IngestResponse(BaseModel):
-    task_id: str = Field(..., description="Celery task ID for tracking ingestion")
-    status: str = Field(..., description="Status of the ingestion request")
-    env: str = Field(..., description="Current environment")

From b7c5ef5d421bdc32935ed501c0223a88fe429d9f Mon Sep 17 00:00:00 2001
From: farhoud <farhoud.m7@gmail.com>
Date: Mon, 8 Dec 2025 20:00:58 +0330
Subject: [PATCH 2/8] Fix type ignore comments in chatbot example

---
 README.md                           |  40 +++++------
 app_main.py                         |   6 +-
 evals/conftest.py                   |   2 +-
 evals/test_retrieval_relevancy.py   |   1 +
 examples/chatbot/chatbot.py         |  12 ++--
 pyproject.toml                      |   3 +
 src/scouter/__init__.py             |   0
 src/scouter/agent/__init__.py       |   0
 src/scouter/agent/agent.py          |  69 ++++++++++++++++++
 src/scouter/agent/mcp.py            |  27 +++++++
 src/scouter/agent/tools.py          |  53 ++++++++++++++
 src/scouter/config/__init__.py      |   0
 src/scouter/config/llm.py           | 107 ++++++++++++++++++++++++++++
 src/scouter/ingestion/__init__.py   |   0
 src/scouter/ingestion/api.py        |  59 +++++++++++++++
 src/scouter/ingestion/service.py    |  67 +++++++++++++++++
 src/scouter/ingestion/tasks.py      |  51 +++++++++++++
 src/scouter/llm/__init__.py         |   0
 src/scouter/llm/agent.py            |  45 ++++++++++++
 src/scouter/llm/client.py           |  55 ++++++++++++++
 src/scouter/llm/tools.py            |  13 ++++
 src/scouter/llm/utils.py            |  92 ++++++++++++++++++++++++
 src/scouter/shared/__init__.py      |   0
 src/scouter/shared/domain_models.py |  58 +++++++++++++++
 24 files changed, 727 insertions(+), 33 deletions(-)
 create mode 100644 src/scouter/__init__.py
 create mode 100644 src/scouter/agent/__init__.py
 create mode 100644 src/scouter/agent/agent.py
 create mode 100644 src/scouter/agent/mcp.py
 create mode 100644 src/scouter/agent/tools.py
 create mode 100644 src/scouter/config/__init__.py
 create mode 100644 src/scouter/config/llm.py
 create mode 100644 src/scouter/ingestion/__init__.py
 create mode 100644 src/scouter/ingestion/api.py
 create mode 100644 src/scouter/ingestion/service.py
 create mode 100644 src/scouter/ingestion/tasks.py
 create mode 100644 src/scouter/llm/__init__.py
 create mode 100644 src/scouter/llm/agent.py
 create mode 100644 src/scouter/llm/client.py
 create mode 100644 src/scouter/llm/tools.py
 create mode 100644 src/scouter/llm/utils.py
 create mode 100644 src/scouter/shared/__init__.py
 create mode 100644 src/scouter/shared/domain_models.py

diff --git a/README.md b/README.md
index 2fb527e..fb21aba 100644
--- a/README.md
+++ b/README.md
@@ -4,11 +4,11 @@ Rapid assessment and retrieval from knowledge graph using Neo4j GraphRAG.
 
 ## Overview
 
-Scouter is a knowledge graph-based document retrieval system that:
+Scouter is a knowledge graph-based document retrieval system focused on MCP (Model Context Protocol) for agentic search:
 
 - Ingests PDFs and text documents using Neo4j GraphRAG's SimpleKGPipeline
-- Provides fast semantic search with relevance scoring
-- Supports both API and MCP (Model Context Protocol) interfaces
+- Provides agentic semantic search via MCP for LLM integration
+- Includes REST API for document ingestion
 - Includes evaluation framework for retrieval quality assessment
 
 ## Quick Start
@@ -62,31 +62,25 @@ curl -X POST "http://localhost:8000/v1/ingest" \
   -d '{"text": "Your document content", "metadata": {"source": "api"}}'
 ```
 
-### Search
-
-```bash
-# Search documents
-curl "http://localhost:8000/v1/search?query=your%20search%20term&limit=5"
-```
-
 ### Interactive API
 
 Visit <http://localhost:8000/docs> for interactive API documentation.
 
+**Note:** Search functionality is provided via MCP (Model Context Protocol) for agentic retrieval. Direct REST search API is not available.
+
 ## Architecture
 
 ### Components
 
 - **Ingestion Service**: Processes PDFs/text into knowledge graph using SimpleKGPipeline
-- **Search Service**: Performs semantic search with relevance scoring
-- **MCP Server**: Provides Model Context Protocol interface for LLM integration
+- **MCP Server**: Core component providing agentic search via Model Context Protocol for LLM integration
 - **Celery Workers**: Handle async document processing
 - **Redis**: Task queue and caching
 
 ### Data Flow
 
 1. Documents → Ingestion API → Celery Queue → Neo4j GraphRAG
-2. Search Query → Search API → Neo4j → Ranked Results
+2. Search Query → MCP Server → Agentic Search → Neo4j → Ranked Results
 
 ## Development
 
@@ -143,24 +137,26 @@ The project uses Neo4j with APOC plugin for enhanced graph procedures. Docker se
 
 ## Examples
 
-### RAG Chatbot
+### MCP Integration (Primary Use Case)
 
 ```bash
-cd examples/chatbot
-python chatbot.py
+# Start MCP server
+python -m scouter_app.agent.mcp
+
+# Use with Claude Desktop or other MCP-compatible tools
 ```
 
-Interactive chatbot that uses Scouter for retrieval and OpenRouter for generation.
+Scouter's MCP server enables agentic search for LLMs, providing semantic retrieval from the knowledge graph.
 
-### MCP Integration
+### RAG Chatbot
 
 ```bash
-# Start MCP server
-python -m scouter_app.agent.mcp
-
-# Use with Claude Desktop or other MCP-compatible tools
+cd examples/chatbot
+python chatbot.py
 ```
 
+Interactive chatbot that uses Scouter for retrieval and OpenRouter for generation.
+
 ## Project Structure
 
 ```
diff --git a/app_main.py b/app_main.py
index 4ca641e..b5e3713 100644
--- a/app_main.py
+++ b/app_main.py
@@ -4,9 +4,9 @@
 
 from fastapi import FastAPI
 
-from src.scouter_app.agent.mcp import app as mcp_app
-from src.scouter_app.config.llm import get_client_config
-from src.scouter_app.ingestion.api import router as ingestion_router
+from src.scouter.agent.mcp import app as mcp_app
+from src.scouter.config.llm import get_client_config
+from src.scouter.ingestion.api import router as ingestion_router
 
 logger = logging.getLogger(__name__)
 
diff --git a/evals/conftest.py b/evals/conftest.py
index 671946a..1b18da7 100644
--- a/evals/conftest.py
+++ b/evals/conftest.py
@@ -10,7 +10,7 @@
 
 import pytest
 
-from scouter_app.ingestion.service import IngestionService
+from scouter.ingestion.service import IngestionService
 
 from .utils import create_light_subset
 
diff --git a/evals/test_retrieval_relevancy.py b/evals/test_retrieval_relevancy.py
index a8dca75..a82ed91 100644
--- a/evals/test_retrieval_relevancy.py
+++ b/evals/test_retrieval_relevancy.py
@@ -3,6 +3,7 @@
 from deepeval.test_case import LLMTestCase
 
 from examples.chatbot.chatbot import chat_with_rag
+
 from .utils import OpenRouterLLM
 
 THRESHOLD = 0.5
diff --git a/examples/chatbot/chatbot.py b/examples/chatbot/chatbot.py
index 787161b..6768628 100644
--- a/examples/chatbot/chatbot.py
+++ b/examples/chatbot/chatbot.py
@@ -6,7 +6,7 @@
 from mcp import ClientSession
 from mcp.client.stdio import StdioServerParameters, stdio_client
 
-from scouter_app.config.llm import (
+from scouter.config.llm import (
     DEFAULT_MODEL,
     call_with_rate_limit,
     get_chatbot_client,
@@ -32,8 +32,6 @@ async def chat_with_rag(query: str) -> str:
 
         mcp_tools = await session.list_tools()
 
-        print(mcp_tools)
-
         # Convert MCP tools to OpenAI format
         openai_tools = [
             {
@@ -72,15 +70,15 @@ async def chat_with_rag(query: str) -> str:
                 tool_args = json.loads(tool_call.function.arguments)
                 result = await session.call_tool(tool_name, tool_args)
                 # Add to messages
-                messages.append(  # type: ignore
-                    {"role": "assistant", "content": "", "tool_calls": [tool_call]}  # type: ignore
+                messages.append(  # type: ignore[PGH003]
+                    {"role": "assistant", "content": "", "tool_calls": [tool_call]}  # type: ignore[PGH003]
                 )
-                messages.append(  # type: ignore
+                messages.append(  # type: ignore[PGH003]
                     {
                         "role": "tool",
                         "content": str(result),
                         "tool_call_id": tool_call.id,
-                    }  # type: ignore
+                    }  # type: ignore[PGH003]
                 )
 
             # Call LLM again with updated messages
diff --git a/pyproject.toml b/pyproject.toml
index f464e9a..0ab3e42 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,6 +38,9 @@ asyncio_mode = "auto"
 [tool.ruff]
 extend = "ruff.toml"
 
+[tool.hatch.build.targets.wheel]
+packages = ["src/scouter"]
+
 [dependency-groups]
 dev = [
     "pre-commit>=4.5.0",
diff --git a/src/scouter/__init__.py b/src/scouter/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/scouter/agent/__init__.py b/src/scouter/agent/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/scouter/agent/agent.py b/src/scouter/agent/agent.py
new file mode 100644
index 0000000..d3e1861
--- /dev/null
+++ b/src/scouter/agent/agent.py
@@ -0,0 +1,69 @@
+import json
+
+from scouter.agent.tools import get_tools
+from scouter.config.llm import (
+    DEFAULT_MODEL,
+    call_with_rate_limit,
+    get_scouter_client,
+)
+
+
+def handle_tool_calls(response, tools, client, messages):
+    if not response.choices[0].message.tool_calls:
+        return response.choices[0].message.content, messages
+
+    # Append assistant message with tool calls
+    messages.append(response.choices[0].message)
+
+    for tool_call in response.choices[0].message.tool_calls:
+        tool_name = tool_call.function.name
+        args = json.loads(tool_call.function.arguments)
+        for tool in tools:
+            if tool["function"]["name"] == tool_name:
+                result = tool["callable"](**args)
+                # Append tool result message
+                messages.append(
+                    {
+                        "role": "tool",
+                        "tool_call_id": tool_call.id,
+                        "content": json.dumps([r.model_dump() for r in result])
+                        if isinstance(result, list)
+                        else json.dumps(result),
+                    }
+                )
+                break
+
+    # Follow-up call for multi-turn
+    follow_up = call_with_rate_limit(
+        client,
+        model=DEFAULT_MODEL,
+        messages=messages,  # type: ignore[arg-type]
+        tools=[t["function"] for t in tools],
+        tool_choice="auto",
+    )
+    return handle_tool_calls(follow_up, tools, client, messages)
+
+
+def search_agent(query: str, hints: str = ""):
+    client = get_scouter_client()
+    tools = get_tools()
+    openai_tools = [t["function"] for t in tools]
+    system_content = (
+        "You are a search agent. Use the available tools to answer the user's query."
+    )
+    if hints:
+        system_content += f"\n\nHints:\n{hints}"
+    messages = [
+        {"role": "system", "content": system_content},
+        {"role": "user", "content": query},
+    ]
+    response = call_with_rate_limit(
+        client,
+        model=DEFAULT_MODEL,
+        messages=messages,  # type: ignore[arg-type]
+        tools=openai_tools,
+        tool_choice="auto",
+        max_tokens=200,
+    )
+    final_response, _ = handle_tool_calls(response, tools, client, messages)
+    return final_response
diff --git a/src/scouter/agent/mcp.py b/src/scouter/agent/mcp.py
new file mode 100644
index 0000000..b499682
--- /dev/null
+++ b/src/scouter/agent/mcp.py
@@ -0,0 +1,27 @@
+from fastmcp import FastMCP
+
+from .agent import search_agent
+
+app = FastMCP("Scouter Agent")
+
+
+@app.tool()
+def search_knowledge_graph(query: str, hints: str = "") -> str:
+    """Search the knowledge graph for information related to the query using semantic search.
+
+    This tool allows LLMs to retrieve relevant documents and knowledge from the Scouter knowledge graph.
+    It performs vector-based semantic search, returning the most relevant results.
+
+    Args:
+        query: The search query string to find relevant information
+        hints: Optional hints to guide the search agent
+
+    Returns:
+        A response string containing search results and analysis
+
+    """
+    return search_agent(query, hints)
+
+
+if __name__ == "__main__":
+    app.run()
diff --git a/src/scouter/agent/tools.py b/src/scouter/agent/tools.py
new file mode 100644
index 0000000..f73b8fd
--- /dev/null
+++ b/src/scouter/agent/tools.py
@@ -0,0 +1,53 @@
+import ast
+
+from neo4j_graphrag.retrievers import VectorRetriever
+from pydantic import BaseModel, Field
+
+from scouter.config.llm import get_neo4j_driver, get_neo4j_embedder
+from scouter.shared.domain_models import VectorSearchResult
+
+
+class SemanticSearchParams(BaseModel):
+    query_text: str = Field(description="exact user query")
+    top_k: int = Field(default=10, description="Number of results to return (1-20)")
+    filters: dict | None = Field(default=None, description="Optional filters")
+    effective_search_ratio: float = Field(
+        default=1.0, description="Search pool multiplier for better accuracy"
+    )
+
+
+def _get_vector_search_tool():
+    retriever = VectorRetriever(
+        driver=get_neo4j_driver(),
+        index_name="chunkEmbedding",
+        embedder=get_neo4j_embedder(),
+    )
+
+    def semantic_search(**kwargs):
+        params = SemanticSearchParams(**kwargs)
+        raw_results = retriever.search(**params.model_dump())
+        items = raw_results.items
+        return [
+            VectorSearchResult(
+                node_id=data.get("id", "unknown"),
+                score=data.get("score", 0.0),
+                content=data.get("text", ""),
+                metadata=data,
+            )
+            for result in items
+            for data in [ast.literal_eval(result.content)]
+        ]
+
+    return {
+        "type": "function",
+        "function": {
+            "name": "semantic_search",
+            "description": "find relative information based on cosine similarity",
+            "parameters": SemanticSearchParams.model_json_schema(),
+        },
+        "callable": semantic_search,
+    }
+
+
+def get_tools():
+    return [_get_vector_search_tool()]
diff --git a/src/scouter/config/__init__.py b/src/scouter/config/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/scouter/config/llm.py b/src/scouter/config/llm.py
new file mode 100644
index 0000000..ca0d95f
--- /dev/null
+++ b/src/scouter/config/llm.py
@@ -0,0 +1,107 @@
+import os
+import time
+from functools import lru_cache
+
+import openai
+from neo4j_graphrag.embeddings import SentenceTransformerEmbeddings
+from neo4j_graphrag.llm import OpenAILLM
+from pydantic import model_validator
+from pydantic_settings import BaseSettings
+
+import neo4j
+from neo4j import GraphDatabase
+
+DEFAULT_MODEL = "openai/gpt-oss-20b:free"
+
+
+class ClientConfig(BaseSettings):
+    provider: str = "openai"
+    api_key: str | None = None
+    model: str = DEFAULT_MODEL
+    api_base: str | None = None
+    temperature: float = 0.7
+    max_tokens: int | None = None
+    env: str = "test"
+
+    @model_validator(mode="after")
+    def set_provider_defaults(self):
+        if self.provider == "openrouter":
+            self.api_base = self.api_base or "https://openrouter.ai/api/v1"
+            self.api_key = self.api_key or os.getenv("OPENROUTER_API_KEY")
+        else:
+            self.api_key = self.api_key or os.getenv("OPENAI_API_KEY")
+        if not self.api_key:
+            msg = f"API key required for provider {self.provider}"
+            raise ValueError(msg)
+        if self.env not in ["development", "production", "test"]:
+            msg = "env must be one of: development, production, test"
+            raise ValueError(msg)
+        return self
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        if self.provider == "openrouter":
+            self.api_base = self.api_base or "https://openrouter.ai/api/v1"
+            self.api_key = os.getenv("OPENROUTER_API_KEY", self.api_key)
+            # Map model if needed, e.g., self.model = "openai/gpt-3.5-turbo"
+        self.env = os.getenv("SCOUTER_ENV", self.env)
+
+
+def get_client_config(provider: str = "openai") -> ClientConfig:
+    return ClientConfig(provider=provider)
+
+
+def create_client(config: ClientConfig) -> openai.OpenAI:
+    return openai.OpenAI(
+        api_key=config.api_key,
+        base_url=config.api_base,
+        max_retries=0,  # Disable built-in retries to let our wrapper handle rate limits
+    )
+
+
+def get_chatbot_client() -> openai.OpenAI:
+    config = get_client_config("openrouter")
+    config.temperature = 0.9  # More creative for chatbot
+    return create_client(config)
+
+
+def get_scouter_client() -> openai.OpenAI:
+    config = get_client_config("openrouter")
+    config.temperature = 0.0  # Deterministic for retrieval
+    return create_client(config)
+
+
+@lru_cache(maxsize=1)
+def get_neo4j_driver() -> neo4j.Driver:
+    uri = os.getenv("NEO4J_URI", "bolt://localhost:7687")
+    user = os.getenv("NEO4J_USER", "neo4j")
+    password = os.getenv("NEO4J_PASSWORD", "password")
+    return GraphDatabase.driver(uri, auth=(user, password))
+
+
+@lru_cache(maxsize=1)
+def get_neo4j_llm() -> OpenAILLM:
+    config = get_client_config("openrouter")
+    return OpenAILLM(config.model, api_key=config.api_key, base_url=config.api_base)
+
+
+@lru_cache(maxsize=1)
+def get_neo4j_embedder() -> SentenceTransformerEmbeddings:
+    return SentenceTransformerEmbeddings("Qwen/Qwen3-Embedding-0.6B")
+
+
+def call_with_rate_limit(client: openai.OpenAI, **kwargs):
+    """Call OpenAI client with rate limit handling."""
+    max_retries = 5
+    for attempt in range(max_retries):
+        try:
+            return client.chat.completions.create(**kwargs)
+        except openai.RateLimitError:  # noqa: PERF203
+            if attempt < max_retries - 1:
+                wait_time = 2**attempt  # Exponential backoff
+                time.sleep(wait_time)
+            else:
+                raise
+        except Exception:
+            raise
+    return None  # Unreachable
diff --git a/src/scouter/ingestion/__init__.py b/src/scouter/ingestion/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/scouter/ingestion/api.py b/src/scouter/ingestion/api.py
new file mode 100644
index 0000000..72c3118
--- /dev/null
+++ b/src/scouter/ingestion/api.py
@@ -0,0 +1,59 @@
+"""API endpoints for document ingestion."""
+
+import json
+import tempfile
+
+from fastapi import APIRouter, Form, UploadFile
+
+from scouter.config.llm import get_client_config
+from scouter.ingestion.tasks import process_document_task
+from scouter.shared.domain_models import IngestResponse
+
+router = APIRouter()
+
+
+@router.post("/v1/ingest", response_model=IngestResponse, status_code=202)
+async def ingest_document(
+    file: UploadFile | None = None,
+    text: str | None = Form(None),
+    metadata: str = Form("{}"),
+) -> IngestResponse:
+    """Ingest a PDF file or raw text into the knowledge graph asynchronously.
+
+    Provide either 'file' (PDF) or 'text', not both.
+
+    Args:
+        file: PDF file to ingest.
+        text: Raw text content to ingest.
+        metadata: JSON string containing metadata.
+
+    Returns:
+        IngestResponse with task ID and status.
+
+    Raises:
+        ValueError: If input validation fails.
+    """
+    # Parse metadata
+    try:
+        metadata_dict = json.loads(metadata)
+    except json.JSONDecodeError:
+        metadata_dict = {}
+
+    # Validate input
+    if (file is None and text is None) or (file is not None and text is not None):
+        msg = "Exactly one of 'file' or 'text' must be provided"
+        raise ValueError(msg)
+
+    task_data = {"metadata": metadata_dict}
+
+    if file is not None:
+        # Save file to temp location
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
+            temp_file.write(await file.read())
+            task_data["file_path"] = temp_file.name
+    else:
+        task_data["text"] = text
+
+    config = get_client_config()
+    task = process_document_task.apply_async(args=[task_data])
+    return IngestResponse(task_id=task.id, status="accepted", env=config.env)
diff --git a/src/scouter/ingestion/service.py b/src/scouter/ingestion/service.py
new file mode 100644
index 0000000..ef3c475
--- /dev/null
+++ b/src/scouter/ingestion/service.py
@@ -0,0 +1,67 @@
+"""Service for ingesting documents into the knowledge graph."""
+
+from typing import Any
+
+from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline
+
+from scouter.config.llm import get_neo4j_driver, get_neo4j_embedder, get_neo4j_llm
+
+
+class IngestionService:
+    """Service for ingesting documents into Neo4j knowledge graph."""
+
+    def __init__(self) -> None:
+        """Initialize the ingestion service with Neo4j connection."""
+        self.driver = get_neo4j_driver()
+        self.llm = get_neo4j_llm()
+        self.embedder = get_neo4j_embedder()
+
+    async def process_document(
+        self,
+        file_path: str | None = None,
+        text: str | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> dict[str, Any]:
+        """Process a PDF file or text into the knowledge graph using SimpleKGPipeline.
+
+        Args:
+            file_path: Path to PDF file to process.
+            text: Text content to process.
+            metadata: Additional metadata for the document.
+
+        Returns:
+            Dictionary containing processing status and type.
+
+        Raises:
+            ValueError: If neither file_path nor text is provided.
+        """
+        if metadata is None:
+            metadata = {}
+
+        if file_path is None and text is None:
+            msg = "Either file_path or text must be provided"
+            raise ValueError(msg)
+
+        try:
+            from_pdf = file_path is not None
+            kg_builder = SimpleKGPipeline(
+                llm=self.llm,
+                driver=self.driver,
+                embedder=self.embedder,
+                from_pdf=from_pdf,
+            )
+            if from_pdf:
+                await kg_builder.run_async(
+                    file_path=file_path,
+                    document_metadata=metadata,
+                )
+            else:
+                await kg_builder.run_async(text=text, document_metadata=metadata)
+        except OSError as e:
+            return {"status": "failed", "error": str(e)}
+        else:
+            return {"status": "processed", "type": "pdf" if from_pdf else "text"}
+
+    def close(self) -> None:
+        """Close the Neo4j driver connection."""
+        self.driver.close()
diff --git a/src/scouter/ingestion/tasks.py b/src/scouter/ingestion/tasks.py
new file mode 100644
index 0000000..21ba09d
--- /dev/null
+++ b/src/scouter/ingestion/tasks.py
@@ -0,0 +1,51 @@
+"""Celery tasks for asynchronous document processing."""
+
+import asyncio
+import concurrent.futures
+import os
+from typing import Any
+
+from celery import Celery
+
+from scouter.ingestion.service import IngestionService
+
+app = Celery(
+    "scouter.ingestion.tasks",
+    broker=os.getenv("REDIS_URL", "redis://localhost:6379/0"),
+)
+
+
+@app.task
+def process_document_task(task_data: dict[str, Any]) -> dict[str, Any]:
+    """Long-running task to process PDF or text into the knowledge graph.
+
+    Args:
+        task_data: Dictionary containing file_path, text, and metadata.
+
+    Returns:
+        Dictionary with processing result.
+    """
+
+    async def run_async() -> dict[str, Any]:
+        service = IngestionService()
+        try:
+            file_path = task_data.get("file_path")
+            text = task_data.get("text")
+            metadata = task_data.get("metadata", {})
+            return await service.process_document(
+                file_path=file_path,
+                text=text,
+                metadata=metadata,
+            )
+        finally:
+            service.close()
+
+    # Run the async function in the event loop
+    loop = asyncio.get_event_loop()
+    if loop.is_running():
+        # If there's already a running loop, use ThreadPoolExecutor
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            future = executor.submit(asyncio.run, run_async())
+            return future.result()
+    else:
+        return asyncio.run(run_async())
diff --git a/src/scouter/llm/__init__.py b/src/scouter/llm/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/scouter/llm/agent.py b/src/scouter/llm/agent.py
new file mode 100644
index 0000000..a07451c
--- /dev/null
+++ b/src/scouter/llm/agent.py
@@ -0,0 +1,45 @@
+import json
+from collections.abc import Iterable
+
+from openai.types.chat import (
+    ChatCompletionMessage,
+    ChatCompletionMessageParam,
+    ChatCompletionToolUnionParam,
+)
+
+from .client import call_llm
+from .tools import run_tool
+
+
+def create_agent(
+    model: str = "gpt-5.1-mini",
+    tools: Iterable[ChatCompletionToolUnionParam] | None = None,
+):
+    """
+    Returns a function that acts as a functional agent with its own tools and model.
+    """
+
+    def agent(
+        messages: list[ChatCompletionMessageParam], max_steps: int = 5
+    ) -> ChatCompletionMessage:
+        steps = 0
+        while steps < max_steps:
+            msg: ChatCompletionMessage = call_llm(model, messages, tools)
+            messages.append(msg)
+
+            # Handle tool calls
+            if getattr(msg, "tool_calls", None):
+                for tc in msg.tool_calls:
+                    args = json.loads(tc.function.arguments)
+                    output = run_tool(tc.function.name, args)
+                    messages.append(
+                        ChatCompletionMessage(
+                            role="tool", content=output, tool_call_id=tc.id
+                        )
+                    )
+            else:
+                break
+            steps += 1
+        return messages[-1]
+
+    return agent
diff --git a/src/scouter/llm/client.py b/src/scouter/llm/client.py
new file mode 100644
index 0000000..01821cc
--- /dev/null
+++ b/src/scouter/llm/client.py
@@ -0,0 +1,55 @@
+import os
+from collections.abc import Iterable
+from dataclasses import dataclass
+
+from openai import OpenAI
+from openai.types.chat import (
+    ChatCompletionMessage,
+    ChatCompletionMessageParam,
+    ChatCompletionToolUnionParam,
+)
+
+from .utils import retry_loop
+
+
+@dataclass(slots=True)
+class LLMConfig:
+    api_key: str | None = None
+    base_url: str | None = None
+    timeout: int = 30
+    max_retries: int = 3
+
+    @staticmethod
+    def load_from_env() -> "LLMConfig":
+        return LLMConfig(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            base_url=os.getenv("OPENAI_BASE_URL"),
+        )
+
+
+def create_llm_client(cfg: LLMConfig | None = None) -> OpenAI:
+    cfg = cfg or LLMConfig.load_from_env()
+
+    return OpenAI(
+        api_key=cfg.api_key,
+        base_url=cfg.base_url,
+        timeout=cfg.timeout,
+        max_retries=cfg.max_retries,
+    )
+
+
+client = create_llm_client()
+
+
+def call_llm(
+    model: str,
+    messages: list[ChatCompletionMessageParam],
+    tools: Iterable[ChatCompletionToolUnionParam] | None = None,
+) -> ChatCompletionMessage:
+    def _call():
+        return client.chat.completions.create(
+            model=model, messages=messages, tools=tools or []
+        )
+
+    res = retry_loop(_call)
+    return res.choices[0].message
diff --git a/src/scouter/llm/tools.py b/src/scouter/llm/tools.py
new file mode 100644
index 0000000..bb9fe73
--- /dev/null
+++ b/src/scouter/llm/tools.py
@@ -0,0 +1,13 @@
+from collections.abc import Callable
+
+tool_registry: dict[str, dict] = {}
+
+
+def register_tool(name: str, fn: Callable, description: str, schema: dict):
+    tool_registry[name] = {"fn": fn, "description": description, "schema": schema}
+
+
+def run_tool(name: str, args: dict) -> str:
+    if name not in tool_registry:
+        return f"Unknown tool: {name}"
+    return tool_registry[name]["fn"](args)
diff --git a/src/scouter/llm/utils.py b/src/scouter/llm/utils.py
new file mode 100644
index 0000000..b929e88
--- /dev/null
+++ b/src/scouter/llm/utils.py
@@ -0,0 +1,92 @@
+import random
+import time
+from typing import cast
+
+from openai import APIError, RateLimitError
+from openai.types.chat import (
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionMessage,
+    ChatCompletionMessageParam,
+    ChatCompletionSystemMessageParam,
+    ChatCompletionToolMessageParam,
+    ChatCompletionUserMessageParam,
+)
+
+ERROR_MAX_RETRY = "max retries exceeded"
+
+
+def retry_loop(func, max_retries=5, base_delay=1):
+    last_exception: BaseException | None = None
+
+    for attempt in range(max_retries):
+        try:
+            return func()
+        except (RateLimitError, APIError) as e:  # noqa: PERF203
+            last_exception = e
+            if attempt == max_retries - 1:
+                break
+
+            sleep_time = base_delay * (2**attempt) + random.uniform(0, 0.5)  # noqa: S311
+            time.sleep(sleep_time)
+
+    # If we reach here, all retries failed
+    raise last_exception or RuntimeError(ERROR_MAX_RETRY)
+
+
+def as_param(msg: ChatCompletionMessage) -> ChatCompletionMessageParam:
+    role = msg.role
+
+    # ---------------- USER ----------------
+    if role == "user":
+        return cast(
+            "ChatCompletionUserMessageParam",
+            {
+                "role": "user",
+                "content": msg.content or "",
+            },
+        )
+
+    # ---------------- SYSTEM ----------------
+    if role == "system":
+        return cast(
+            "ChatCompletionSystemMessageParam",
+            {
+                "role": "system",
+                "content": msg.content or "",
+            },
+        )
+
+    # ---------------- TOOL ----------------
+    if role == "tool":
+        # Narrow type: Pyright understands msg.tool_call_id here
+        return cast(
+            "ChatCompletionToolMessageParam",
+            {
+                "role": "tool",
+                "content": msg.content or "",
+                "tool_call_id": msg.tool_call_id,  # valid only for tool messages
+            },
+        )
+
+    # ---------------- ASSISTANT ----------------
+    assistant: ChatCompletionAssistantMessageParam = {
+        "role": "assistant",
+    }
+
+    if msg.content is not None:
+        assistant["content"] = msg.content
+
+    if msg.tool_calls:
+        assistant["tool_calls"] = [
+            {
+                "id": tc.id,
+                "type": "function",
+                "function": {
+                    "name": tc.function.name,
+                    "arguments": tc.function.arguments,
+                },
+            }
+            for tc in msg.tool_calls
+        ]
+
+    return assistant
diff --git a/src/scouter/shared/__init__.py b/src/scouter/shared/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/scouter/shared/domain_models.py b/src/scouter/shared/domain_models.py
new file mode 100644
index 0000000..3ff7313
--- /dev/null
+++ b/src/scouter/shared/domain_models.py
@@ -0,0 +1,58 @@
+from fastapi import UploadFile
+from pydantic import BaseModel, Field, model_validator
+
+
+class DocumentIngestRequest(BaseModel):
+    file: UploadFile | None = Field(
+        None,
+        description="PDF file to ingest (mutually exclusive with text)",
+    )
+    text: str | None = Field(
+        None,
+        description="Raw text content to ingest (mutually exclusive with file)",
+    )
+    metadata: dict = Field(
+        default_factory=dict,
+        description="Additional metadata for the document",
+    )
+
+    @model_validator(mode="after")
+    def validate_input(self):
+        if self.file is not None and self.text is not None:
+            msg = "Exactly one of 'file' or 'text' must be provided, not both"
+            raise ValueError(
+                msg,
+            )
+        if self.file is None and self.text is None:
+            msg = "Exactly one of 'file' or 'text' must be provided"
+            raise ValueError(msg)
+        if self.file is not None:
+            filename = self.file.filename
+            if not filename or not filename.lower().endswith(".pdf"):
+                msg = "Only PDF files are supported"
+                raise ValueError(msg)
+        return self
+
+
+class SearchRequest(BaseModel):
+    query: str = Field(..., description="The search query string")
+    limit: int = Field(default=10, description="Maximum number of results to return")
+
+
+class SearchResult(BaseModel):
+    content: str = Field(..., description="Retrieved content snippet")
+    score: float = Field(..., description="Relevance score of the result")
+    node_id: str = Field(..., description="ID of the graph node")
+
+
+class VectorSearchResult(BaseModel):
+    node_id: str = Field(description="Unique node identifier")
+    score: float = Field(description="Similarity score")
+    content: str = Field(description="Retrieved content")
+    metadata: dict | None = Field(default=None, description="Additional metadata")
+
+
+class IngestResponse(BaseModel):
+    task_id: str = Field(..., description="Celery task ID for tracking ingestion")
+    status: str = Field(..., description="Status of the ingestion request")
+    env: str = Field(..., description="Current environment")

From 2c8893b71386ccb38d620cf07fffec5ffbc8044d Mon Sep 17 00:00:00 2001
From: farhoud <farhoud.m7@gmail.com>
Date: Tue, 9 Dec 2025 18:28:44 +0330
Subject: [PATCH 3/8] feat: support str return type in tools

- Update Tool handler type to support BaseModel | str returns
- Modify validation and execution to handle str outputs
- Add run_tool function for name-based tool execution
- Update docstrings and type hints accordingly
---
 src/scouter/llm/tools.py | 188 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 180 insertions(+), 8 deletions(-)

diff --git a/src/scouter/llm/tools.py b/src/scouter/llm/tools.py
index bb9fe73..f592fed 100644
--- a/src/scouter/llm/tools.py
+++ b/src/scouter/llm/tools.py
@@ -1,13 +1,185 @@
-from collections.abc import Callable
+from __future__ import annotations
 
-tool_registry: dict[str, dict] = {}
+import asyncio
+import inspect
+import json
+from collections.abc import Callable  # noqa: TC003
+from typing import TYPE_CHECKING, Any, get_origin
 
+from pydantic import BaseModel, Field
 
-def register_tool(name: str, fn: Callable, description: str, schema: dict):
-    tool_registry[name] = {"fn": fn, "description": description, "schema": schema}
+from .exceptions import ToolExecutionError
 
+if TYPE_CHECKING:
+    from .types import ChatCompletionToolParam
 
-def run_tool(name: str, args: dict) -> str:
-    if name not in tool_registry:
-        return f"Unknown tool: {name}"
-    return tool_registry[name]["fn"](args)
+
+class Tool(BaseModel):
+    name: str
+    description: str
+    handler: Callable[[BaseModel], BaseModel | str]
+
+    # Auto-filled fields
+    parameters_schema: dict = Field(default_factory=dict)
+    output_schema: dict = Field(default_factory=dict)
+    description_with_output: str = ""
+
+    # Internal: Store the actual class types for runtime conversion
+    input_type: type[BaseModel] | None = None
+
+    def model_post_init(self, /, __context) -> None:
+        # 1. Extract input model from handler signature
+        sig = inspect.signature(self.handler)
+        if not sig.parameters:
+            msg = f"Handler for tool '{self.name}' must have at least one argument (the input Pydantic model)."
+            raise TypeError(msg)
+
+        param = next(iter(sig.parameters.values()))
+        input_model = param.annotation
+
+        origin = get_origin(input_model) or input_model
+        if not (isinstance(origin, type) and issubclass(origin, BaseModel)):
+            msg = f"Handler first param for '{self.name}' must be a Pydantic BaseModel, got {origin}"
+            raise TypeError(msg)
+
+        self.input_type = origin  # SAVE THIS for execute_tool
+
+        # 2. Extract return type
+        return_type = sig.return_annotation
+        return_origin = get_origin(return_type) or return_type
+        if return_origin is str:
+            pass  # Allow str
+        elif isinstance(return_origin, type) and issubclass(return_origin, BaseModel):
+            pass  # Allow BaseModel
+        else:
+            msg = f"Handler for '{self.name}' must return a Pydantic BaseModel or str"
+            raise TypeError(msg)
+
+        # 3. Auto-fill everything
+        self.parameters_schema = origin.model_json_schema()
+        if return_origin is str:
+            self.output_schema = {"type": "string"}
+        else:
+            self.output_schema = return_origin.model_json_schema()  # type: ignore[reportAttributeAccessIssue]
+
+        # 4. Enrich description with pretty-printed output schema
+        if return_origin is str:
+            self.description_with_output = (
+                f"{self.description}\n\nThe tool will **always return a string**."
+            )
+        else:
+            pretty_output = json.dumps(self.output_schema, indent=2)
+            self.description_with_output = (
+                f"{self.description}\n\n"
+                f"The tool will **always return JSON matching this exact schema**:\n"
+                f"```json\n{pretty_output}\n```"
+            )
+
+    def openai_tool_spec(self) -> ChatCompletionToolParam:
+        return {
+            "type": "function",
+            "function": {
+                "name": self.name,
+                "description": self.description_with_output,
+                "parameters": self.parameters_schema,
+            },
+        }
+
+
+def create_tool(
+    name: str, description: str, handler: Callable[[BaseModel], BaseModel | str]
+) -> Tool:
+    """
+    Creates a Pydantic Tool instance.
+    """
+    return Tool(name=name, description=description, handler=handler)
+
+
+def tool(name: str | None = None, description: str | None = None):
+    """
+    Decorator to create and register a Pydantic-based tool.
+    The decorated function MUST take a Pydantic model and return a Pydantic model or a string.
+    """
+
+    def decorator(func: Callable[[BaseModel], BaseModel | str]):
+        tool_name = name or func.__name__
+        tool_desc = description or (func.__doc__ or "No description.").strip()
+
+        # Create the Tool instance
+        t = create_tool(tool_name, tool_desc, func)
+
+        # Register it
+        register_tool(t)
+
+        return func
+
+    return decorator
+
+
+def run_tool(name: str, raw_args: dict[str, Any]) -> str:
+    """
+    Looks up a tool by name and executes it.
+    """
+    tool_instance = lookup_tool(name)
+    return execute_tool(tool_instance, raw_args)
+
+
+def execute_tool(tool_instance: Tool, raw_args: dict[str, Any]) -> str:
+    """
+    Executes a Pydantic Tool.
+    1. Converts raw_args (dict) -> InputModel (Pydantic).
+    2. Calls handler(InputModel).
+    3. Gets OutputModel or str.
+    4. Returns OutputModel.model_dump_json() or the str.
+    """
+    try:
+        # 1. Instantiate the specific input model
+        input_model_cls = tool_instance.input_type
+        assert input_model_cls is not None
+        input_obj = input_model_cls(**raw_args)
+
+        # 2. Call Handler
+        handler = tool_instance.handler
+
+        if inspect.iscoroutinefunction(handler):
+            result_model = asyncio.run(handler(input_obj))
+        else:
+            result_model = handler(input_obj)
+
+        # 3. Validate Return
+        if not isinstance(result_model, (BaseModel, str)):
+            msg = f"Tool '{tool_instance.name}' handler did not return a Pydantic model or str."
+            raise ToolExecutionError(msg)  # noqa: TRY301
+
+        # 4. Serialize Output
+        if isinstance(result_model, str):
+            return result_model
+        return result_model.model_dump_json()
+
+    except Exception as e:
+        msg = f"Error executing tool '{tool_instance.name}': {e!s}"
+        raise ToolExecutionError(msg) from e
+
+
+# Global registry stores Tool instances
+TOOL_REGISTRY: dict[str, Tool] = {}
+
+
+def register_tool(tool_instance: Tool) -> None:
+    """
+    Registers a Tool object in the global registry.
+    """
+    if not tool_instance.name:
+        msg = "Cannot register tool without a name."
+        raise ToolExecutionError(msg)
+    TOOL_REGISTRY[tool_instance.name] = tool_instance
+
+
+def lookup_tool(name: str) -> Tool:
+    """
+    Retrieves a Tool object from the global registry.
+    """
+    if name not in TOOL_REGISTRY:
+        msg = f"Tool '{name}' not found in registry."
+        raise ToolExecutionError(msg)
+    return TOOL_REGISTRY[name]

From ed25e593d4945e6951d4b9538500e4bff2862daf Mon Sep 17 00:00:00 2001
From: farhoud <farhoud.m7@gmail.com>
Date: Tue, 9 Dec 2025 18:28:49 +0330
Subject: [PATCH 4/8] fix: update agent to use new tool and fix type issues

- Import and use run_tool function for tool execution
- Add proper type casts for message handling
- Move imports to TYPE_CHECKING blocks for better performance
---
 src/scouter/llm/agent.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/scouter/llm/agent.py b/src/scouter/llm/agent.py
index a07451c..777f97e 100644
--- a/src/scouter/llm/agent.py
+++ b/src/scouter/llm/agent.py
@@ -1,12 +1,19 @@
 import json
 from collections.abc import Iterable
+from typing import TYPE_CHECKING, cast
 
 from openai.types.chat import (
     ChatCompletionMessage,
     ChatCompletionMessageParam,
+    ChatCompletionToolMessageParam,
     ChatCompletionToolUnionParam,
 )
 
+if TYPE_CHECKING:
+    from openai.types.chat.chat_completion_message_tool_call import (
+        ChatCompletionMessageToolCall,
+    )
+
 from .client import call_llm
 from .tools import run_tool
 
@@ -25,21 +32,25 @@ def agent(
         steps = 0
         while steps < max_steps:
             msg: ChatCompletionMessage = call_llm(model, messages, tools)
-            messages.append(msg)
+            messages.append(cast("ChatCompletionMessageParam", msg))
 
             # Handle tool calls
-            if getattr(msg, "tool_calls", None):
+            if msg.tool_calls:
                 for tc in msg.tool_calls:
+                    tc = cast("ChatCompletionMessageToolCall", tc)
                     args = json.loads(tc.function.arguments)
                     output = run_tool(tc.function.name, args)
                     messages.append(
-                        ChatCompletionMessage(
-                            role="tool", content=output, tool_call_id=tc.id
+                        cast(
+                            "ChatCompletionMessageParam",
+                            ChatCompletionToolMessageParam(
+                                role="tool", content=output, tool_call_id=tc.id
+                            ),
                         )
                     )
             else:
                 break
             steps += 1
-        return messages[-1]
+        return cast("ChatCompletionMessage", messages[-1])
 
     return agent

From 8651f29f005e6753756ed478f103a4e8ac7d77be Mon Sep 17 00:00:00 2001
From: farhoud <farhoud.m7@gmail.com>
Date: Tue, 9 Dec 2025 18:28:55 +0330
Subject: [PATCH 5/8] refactor: clean up utils.py

- Remove unused imports
- Remove unused as_param function
---
 src/scouter/llm/utils.py | 68 ----------------------------------------
 1 file changed, 68 deletions(-)

diff --git a/src/scouter/llm/utils.py b/src/scouter/llm/utils.py
index b929e88..8d53808 100644
--- a/src/scouter/llm/utils.py
+++ b/src/scouter/llm/utils.py
@@ -1,16 +1,7 @@
 import random
 import time
-from typing import cast
 
 from openai import APIError, RateLimitError
-from openai.types.chat import (
-    ChatCompletionAssistantMessageParam,
-    ChatCompletionMessage,
-    ChatCompletionMessageParam,
-    ChatCompletionSystemMessageParam,
-    ChatCompletionToolMessageParam,
-    ChatCompletionUserMessageParam,
-)
 
 ERROR_MAX_RETRY = "max retries exceeded"
 
@@ -31,62 +22,3 @@ def retry_loop(func, max_retries=5, base_delay=1):
 
     # If we reach here, all retries failed
     raise last_exception or RuntimeError(ERROR_MAX_RETRY)
-
-
-def as_param(msg: ChatCompletionMessage) -> ChatCompletionMessageParam:
-    role = msg.role
-
-    # ---------------- USER ----------------
-    if role == "user":
-        return cast(
-            "ChatCompletionUserMessageParam",
-            {
-                "role": "user",
-                "content": msg.content or "",
-            },
-        )
-
-    # ---------------- SYSTEM ----------------
-    if role == "system":
-        return cast(
-            "ChatCompletionSystemMessageParam",
-            {
-                "role": "system",
-                "content": msg.content or "",
-            },
-        )
-
-    # ---------------- TOOL ----------------
-    if role == "tool":
-        # Narrow type: Pyright understands msg.tool_call_id here
-        return cast(
-            "ChatCompletionToolMessageParam",
-            {
-                "role": "tool",
-                "content": msg.content or "",
-                "tool_call_id": msg.tool_call_id,  # valid only for tool messages
-            },
-        )
-
-    # ---------------- ASSISTANT ----------------
-    assistant: ChatCompletionAssistantMessageParam = {
-        "role": "assistant",
-    }
-
-    if msg.content is not None:
-        assistant["content"] = msg.content
-
-    if msg.tool_calls:
-        assistant["tool_calls"] = [
-            {
-                "id": tc.id,
-                "type": "function",
-                "function": {
-                    "name": tc.function.name,
-                    "arguments": tc.function.arguments,
-                },
-            }
-            for tc in msg.tool_calls
-        ]
-
-    return assistant

From 24997ab83501468e66884bb388eed5b437c6638a Mon Sep 17 00:00:00 2001
From: farhoud <farhoud.m7@gmail.com>
Date: Tue, 9 Dec 2025 18:29:02 +0330
Subject: [PATCH 6/8] feat: add exceptions and types modules

- Add ToolExecutionError exception
- Add OpenAI type re-exports for convenience
---
 src/scouter/llm/exceptions.py | 10 ++++++++++
 src/scouter/llm/types.py      | 26 ++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 src/scouter/llm/exceptions.py
 create mode 100644 src/scouter/llm/types.py

diff --git a/src/scouter/llm/exceptions.py b/src/scouter/llm/exceptions.py
new file mode 100644
index 0000000..417742b
--- /dev/null
+++ b/src/scouter/llm/exceptions.py
@@ -0,0 +1,10 @@
+class LLMError(Exception):
+    """Base exception for LLM related errors."""
+
+
+class ToolExecutionError(LLMError):
+    """Raised when a tool fails to execute."""
+
+
+class AgentError(LLMError):
+    """Raised when agent operations fail."""
diff --git a/src/scouter/llm/types.py b/src/scouter/llm/types.py
new file mode 100644
index 0000000..6b22afb
--- /dev/null
+++ b/src/scouter/llm/types.py
@@ -0,0 +1,26 @@
+# Re-export OpenAI types
+from openai.types.chat import (
+    ChatCompletion,
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionMessage,
+    ChatCompletionMessageParam,
+    ChatCompletionSystemMessageParam,
+    ChatCompletionToolMessageParam,
+    ChatCompletionToolParam,
+    ChatCompletionUserMessageParam,
+)
+from openai.types.chat.chat_completion_message_tool_call import (
+    ChatCompletionMessageToolCall,
+)
+
+__all__ = [
+    "ChatCompletion",
+    "ChatCompletionAssistantMessageParam",
+    "ChatCompletionMessage",
+    "ChatCompletionMessageParam",
+    "ChatCompletionMessageToolCall",
+    "ChatCompletionSystemMessageParam",
+    "ChatCompletionToolMessageParam",
+    "ChatCompletionToolParam",
+    "ChatCompletionUserMessageParam",
+]

From 96ba0a5decda30167cf33bcb4eac8a39ba4e5aab Mon Sep 17 00:00:00 2001
From: farhoud <farhoud.m7@gmail.com>
Date: Wed, 10 Dec 2025 19:20:33 +0330
Subject: [PATCH 7/8] Fix llmcore agent: correct continue condition, update
 model name, and resolve lint issues

---
 src/scouter/llmcore/__init__.py   |  53 +++++++++
 src/scouter/llmcore/agent.py      | 164 ++++++++++++++++++++++++++
 src/scouter/llmcore/client.py     |  86 ++++++++++++++
 src/scouter/llmcore/exceptions.py |  10 ++
 src/scouter/llmcore/tools.py      | 185 ++++++++++++++++++++++++++++++
 src/scouter/llmcore/types.py      |  26 +++++
 src/scouter/llmcore/utils.py      |  24 ++++
 7 files changed, 548 insertions(+)
 create mode 100644 src/scouter/llmcore/__init__.py
 create mode 100644 src/scouter/llmcore/agent.py
 create mode 100644 src/scouter/llmcore/client.py
 create mode 100644 src/scouter/llmcore/exceptions.py
 create mode 100644 src/scouter/llmcore/tools.py
 create mode 100644 src/scouter/llmcore/types.py
 create mode 100644 src/scouter/llmcore/utils.py

diff --git a/src/scouter/llmcore/__init__.py b/src/scouter/llmcore/__init__.py
new file mode 100644
index 0000000..d190412
--- /dev/null
+++ b/src/scouter/llmcore/__init__.py
@@ -0,0 +1,53 @@
+from .agent import AgentRun, run_agent
+from .client import ChatCompletionOptions, LLMConfig, call_llm, create_llm_client
+from .exceptions import AgentError, LLMError, ToolExecutionError
+from .tools import (
+    Tool,
+    create_tool,
+    execute_tool,
+    lookup_tool,
+    register_tool,
+    run_tool,
+    tool,
+)
+from .types import (
+    ChatCompletion,
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionMessage,
+    ChatCompletionMessageParam,
+    ChatCompletionMessageToolCall,
+    ChatCompletionSystemMessageParam,
+    ChatCompletionToolMessageParam,
+    ChatCompletionToolParam,
+    ChatCompletionUserMessageParam,
+)
+from .utils import retry_loop
+
+__all__ = [
+    "AgentError",
+    "AgentRun",
+    "ChatCompletion",
+    "ChatCompletionAssistantMessageParam",
+    "ChatCompletionMessage",
+    "ChatCompletionMessageParam",
+    "ChatCompletionMessageToolCall",
+    "ChatCompletionOptions",
+    "ChatCompletionSystemMessageParam",
+    "ChatCompletionToolMessageParam",
+    "ChatCompletionToolParam",
+    "ChatCompletionUserMessageParam",
+    "LLMConfig",
+    "LLMError",
+    "Tool",
+    "ToolExecutionError",
+    "call_llm",
+    "create_llm_client",
+    "create_tool",
+    "execute_tool",
+    "lookup_tool",
+    "register_tool",
+    "retry_loop",
+    "run_agent",
+    "run_tool",
+    "tool",
+]
diff --git a/src/scouter/llmcore/agent.py b/src/scouter/llmcore/agent.py
new file mode 100644
index 0000000..246a14e
--- /dev/null
+++ b/src/scouter/llmcore/agent.py
@@ -0,0 +1,164 @@
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from time import time
+from typing import TYPE_CHECKING, cast
+
+from openai.types.chat import (
+    ChatCompletion,
+    ChatCompletionMessageParam,
+    ChatCompletionToolMessageParam,
+    ChatCompletionToolUnionParam,
+)
+
+if TYPE_CHECKING:
+    from collections.abc import Callable, Iterable
+
+    from openai.types.chat.chat_completion_message_tool_call import (
+        ChatCompletionMessageToolCall,
+    )
+
+from .client import ChatCompletionOptions, call_llm
+from .tools import run_tool
+
+
+@dataclass
+class InputStep:
+    message: ChatCompletionMessageParam
+
+
+@dataclass
+class LLMStep:
+    completion: ChatCompletion
+
+    @property
+    def message(self) -> ChatCompletionMessageParam:
+        return cast("ChatCompletionMessageParam", self.completion.choices[0].message)
+
+
+@dataclass
+class ToolStep:
+    tool_call_id: str
+    tool_name: str
+    args: dict
+    output: str
+    execution_time: float
+    success: bool
+    error_message: str | None
+
+    @property
+    def message(self) -> ChatCompletionToolMessageParam:
+        return ChatCompletionToolMessageParam(
+            role="tool", content=self.output, tool_call_id=self.tool_call_id
+        )
+
+
+Step = InputStep | LLMStep | ToolStep
+
+
+@dataclass
+class AgentRun:
+    continue_condition: Callable[[AgentRun], bool] = field(
+        default_factory=lambda: default_continue_condition_factory()
+    )
+    steps: list[Step] = field(default_factory=list)
+
+    def add_step(self, step: Step) -> None:
+        """Add a step to the run."""
+        self.steps.append(step)
+
+    @property
+    def conversation_history(self) -> list[ChatCompletionMessageParam]:
+        return [step.message for step in self.steps]
+
+    @property
+    def total_usage(
+        self,
+    ) -> dict:  # Simplified, can make proper ChatCompletionUsage later
+        total = {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0}
+        for step in self.steps:
+            if isinstance(step, LLMStep) and step.completion.usage:
+                usage = step.completion.usage
+                total["completion_tokens"] += usage.completion_tokens or 0
+                total["prompt_tokens"] += usage.prompt_tokens or 0
+                total["total_tokens"] += usage.total_tokens or 0
+        return total
+
+    @property
+    def last_output(self) -> str:
+        if not self.steps:
+            msg = "No steps in run"
+            raise ValueError(msg)
+        last_step = self.steps[-1]
+        if isinstance(last_step, LLMStep):
+            content = last_step.message.get("content")
+            return content if isinstance(content, str) else ""
+        if isinstance(last_step, ToolStep):
+            return last_step.output
+        return ""
+
+    @property
+    def tool_executions(self) -> list[ToolStep]:
+        return [step for step in self.steps if isinstance(step, ToolStep)]
+
+
+def default_continue_condition_factory(
+    max_steps: int | None = None,
+) -> Callable[[AgentRun], bool]:
+    def condition(run: AgentRun) -> bool:
+        if max_steps is not None:
+            llm_count = sum(1 for step in run.steps if isinstance(step, LLMStep))
+            if llm_count >= max_steps:
+                return False
+        # Filter out InputStep to find the last meaningful step
+        non_input_steps = [
+            step for step in run.steps if not isinstance(step, InputStep)
+        ]
+        if not non_input_steps:
+            return True  # Only InputSteps present, initial state
+        last_non_input = non_input_steps[-1]
+        return isinstance(last_non_input, ToolStep)
+
+    return condition
+
+
+def run_agent(
+    run: AgentRun,
+    model: str = "gpt-4o-mini",
+    tools: Iterable[ChatCompletionToolUnionParam] | None = None,
+    options: ChatCompletionOptions | None = None,
+):
+    while run.continue_condition(run):
+        completion: ChatCompletion = call_llm(
+            model, run.conversation_history, tools, options
+        )
+        msg = completion.choices[0].message
+        run.add_step(LLMStep(completion))
+
+        # Handle tool calls
+        if msg.tool_calls:
+            for tc in msg.tool_calls:
+                tc = cast("ChatCompletionMessageToolCall", tc)
+                args = json.loads(tc.function.arguments)
+                start = time()
+                try:
+                    output = run_tool(tc.function.name, args)
+                    success = True
+                    error = None
+                except Exception as e:  # noqa: BLE001
+                    output = ""
+                    success = False
+                    error = str(e)
+                end = time()
+                run.add_step(
+                    ToolStep(
+                        tc.id,
+                        tc.function.name,
+                        args,
+                        output,
+                        end - start,
+                        success,
+                        error,
+                    )
+                )
diff --git a/src/scouter/llmcore/client.py b/src/scouter/llmcore/client.py
new file mode 100644
index 0000000..8944e18
--- /dev/null
+++ b/src/scouter/llmcore/client.py
@@ -0,0 +1,86 @@
+import os
+from collections.abc import Iterable
+from dataclasses import dataclass
+from typing import TypedDict
+
+from openai import OpenAI
+from openai.types.chat import (
+    ChatCompletion,
+    ChatCompletionMessageParam,
+    ChatCompletionToolUnionParam,
+)
+
+from .utils import retry_loop
+
+
+class ChatCompletionOptions(TypedDict, total=False):
+    """Options for ChatCompletion API calls.
+
+    Attributes:
+        max_tokens: Maximum number of tokens to generate.
+        temperature: Sampling temperature (0.0 to 2.0).
+        top_p: Nucleus sampling parameter.
+        frequency_penalty: Frequency penalty (-2.0 to 2.0).
+        presence_penalty: Presence penalty (-2.0 to 2.0).
+        stop: List of stop sequences.
+    """
+
+    max_tokens: int
+    temperature: float
+    top_p: float
+    frequency_penalty: float
+    presence_penalty: float
+    stop: list[str]
+
+
+@dataclass(slots=True)
+class LLMConfig:
+    api_key: str | None = None
+    base_url: str | None = None
+    timeout: int = 30
+    max_retries: int = 3
+
+    @staticmethod
+    def load_from_env() -> "LLMConfig":
+        return LLMConfig(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            base_url=os.getenv("OPENAI_BASE_URL"),
+        )
+
+
+def create_llm_client(cfg: LLMConfig | None = None) -> OpenAI:
+    cfg = cfg or LLMConfig.load_from_env()
+
+    return OpenAI(
+        api_key=cfg.api_key,
+        base_url=cfg.base_url,
+        timeout=cfg.timeout,
+        max_retries=cfg.max_retries,
+    )
+
+
+client = create_llm_client()
+
+
+def call_llm(
+    model: str,
+    messages: list[ChatCompletionMessageParam],
+    tools: Iterable[ChatCompletionToolUnionParam] | None = None,
+    options: ChatCompletionOptions | None = None,
+) -> ChatCompletion:
+    """Call the LLM with the given parameters.
+
+    Args:
+        model: The model to use.
+        messages: List of messages.
+        tools: Optional tools.
+        options: Optional ChatCompletion options like max_tokens, temperature, etc.
+    """
+
+    def _call():
+        kwargs = options or {}
+        return client.chat.completions.create(
+            model=model, messages=messages, tools=tools or [], **kwargs
+        )
+
+    return retry_loop(_call)
diff --git a/src/scouter/llmcore/exceptions.py b/src/scouter/llmcore/exceptions.py
new file mode 100644
index 0000000..417742b
--- /dev/null
+++ b/src/scouter/llmcore/exceptions.py
@@ -0,0 +1,10 @@
+class LLMError(Exception):
+    """Base exception for LLM related errors."""
+
+
+class ToolExecutionError(LLMError):
+    """Raised when a tool fails to execute."""
+
+
+class AgentError(LLMError):
+    """Raised when agent operations fail."""
diff --git a/src/scouter/llmcore/tools.py b/src/scouter/llmcore/tools.py
new file mode 100644
index 0000000..f592fed
--- /dev/null
+++ b/src/scouter/llmcore/tools.py
@@ -0,0 +1,185 @@
+from __future__ import annotations
+
+import asyncio
+import inspect
+import json
+from collections.abc import Callable  # noqa: TC003
+from typing import TYPE_CHECKING, Any, get_origin
+
+from pydantic import BaseModel, Field
+
+from .exceptions import ToolExecutionError
+
+if TYPE_CHECKING:
+    from .types import ChatCompletionToolParam
+
+
+class Tool(BaseModel):
+    name: str
+    description: str
+    handler: Callable[[BaseModel], BaseModel | str]
+
+    # Auto-filled fields
+    parameters_schema: dict = Field(default_factory=dict)
+    output_schema: dict = Field(default_factory=dict)
+    description_with_output: str = ""
+
+    # Internal: Store the actual class types for runtime conversion
+    input_type: type[BaseModel] | None = None
+
+    def model_post_init(self, /, __context) -> None:
+        # 1. Extract input model from handler signature
+        sig = inspect.signature(self.handler)
+        if not sig.parameters:
+            msg = f"Handler for tool '{self.name}' must have at least one argument (the input Pydantic model)."
+            raise TypeError(msg)
+
+        param = next(iter(sig.parameters.values()))
+        input_model = param.annotation
+
+        origin = get_origin(input_model) or input_model
+        if not (isinstance(origin, type) and issubclass(origin, BaseModel)):
+            msg = f"Handler first param for '{self.name}' must be a Pydantic BaseModel, got {origin}"
+            raise TypeError(msg)
+
+        self.input_type = origin  # SAVE THIS for execute_tool
+
+        # 2. Extract return type
+        return_type = sig.return_annotation
+        return_origin = get_origin(return_type) or return_type
+        if return_origin is str:
+            pass  # Allow str
+        elif isinstance(return_origin, type) and issubclass(return_origin, BaseModel):
+            pass  # Allow BaseModel
+        else:
+            msg = f"Handler for '{self.name}' must return a Pydantic BaseModel or str"
+            raise TypeError(msg)
+
+        # 3. Auto-fill everything
+        self.parameters_schema = origin.model_json_schema()
+        if return_origin is str:
+            self.output_schema = {"type": "string"}
+        else:
+            self.output_schema = return_origin.model_json_schema()  # type: ignore[reportAttributeAccessIssue]
+
+        # 4. Enrich description with pretty-printed output schema
+        if return_origin is str:
+            self.description_with_output = (
+                f"{self.description}\n\nThe tool will **always return a string**."
+            )
+        else:
+            pretty_output = json.dumps(self.output_schema, indent=2)
+            self.description_with_output = (
+                f"{self.description}\n\n"
+                f"The tool will **always return JSON matching this exact schema**:\n"
+                f"```json\n{pretty_output}\n```"
+            )
+
+    def openai_tool_spec(self) -> ChatCompletionToolParam:
+        return {
+            "type": "function",
+            "function": {
+                "name": self.name,
+                "description": self.description_with_output,
+                "parameters": self.parameters_schema,
+            },
+        }
+
+
+def create_tool(
+    name: str, description: str, handler: Callable[[BaseModel], BaseModel | str]
+) -> Tool:
+    """
+    Creates a Pydantic Tool instance.
+    """
+    return Tool(name=name, description=description, handler=handler)
+
+
+def tool(name: str | None = None, description: str | None = None):
+    """
+    Decorator to create and register a Pydantic-based tool.
+    The decorated function MUST take a Pydantic model and return a Pydantic model or a string.
+    """
+
+    def decorator(func: Callable[[BaseModel], BaseModel | str]):
+        tool_name = name or func.__name__
+        tool_desc = description or (func.__doc__ or "No description.").strip()
+
+        # Create the Tool instance
+        t = create_tool(tool_name, tool_desc, func)
+
+        # Register it
+        register_tool(t)
+
+        return func
+
+    return decorator
+
+
+def run_tool(name: str, raw_args: dict[str, Any]) -> str:
+    """
+    Looks up a tool by name and executes it.
+    """
+    tool_instance = lookup_tool(name)
+    return execute_tool(tool_instance, raw_args)
+
+
+def execute_tool(tool_instance: Tool, raw_args: dict[str, Any]) -> str:
+    """
+    Executes a Pydantic Tool.
+    1. Converts raw_args (dict) -> InputModel (Pydantic).
+    2. Calls handler(InputModel).
+    3. Gets OutputModel or str.
+    4. Returns OutputModel.model_dump_json() or the str.
+    """
+    try:
+        # 1. Instantiate the specific input model
+        input_model_cls = tool_instance.input_type
+        assert input_model_cls is not None
+        input_obj = input_model_cls(**raw_args)
+
+        # 2. Call Handler
+        handler = tool_instance.handler
+
+        if inspect.iscoroutinefunction(handler):
+            result_model = asyncio.run(handler(input_obj))
+        else:
+            result_model = handler(input_obj)
+
+        # 3. Validate Return
+        if not isinstance(result_model, (BaseModel, str)):
+            msg = f"Tool '{tool_instance.name}' handler did not return a Pydantic model or str."
+            raise ToolExecutionError(msg)  # noqa: TRY301
+
+        # 4. Serialize Output
+        if isinstance(result_model, str):
+            return result_model
+        return result_model.model_dump_json()
+
+    except Exception as e:
+        msg = f"Error executing tool '{tool_instance.name}': {e!s}"
+        raise ToolExecutionError(msg) from e
+
+
+# Global registry stores Tool instances
+TOOL_REGISTRY: dict[str, Tool] = {}
+
+
+def register_tool(tool_instance: Tool) -> None:
+    """
+    Registers a Tool object in the global registry.
+    """
+    if not tool_instance.name:
+        msg = "Cannot register tool without a name."
+        raise ToolExecutionError(msg)
+    TOOL_REGISTRY[tool_instance.name] = tool_instance
+
+
+def lookup_tool(name: str) -> Tool:
+    """
+    Retrieves a Tool object from the global registry.
+    """
+    if name not in TOOL_REGISTRY:
+        msg = f"Tool '{name}' not found in registry."
+        raise ToolExecutionError(msg)
+    return TOOL_REGISTRY[name]
diff --git a/src/scouter/llmcore/types.py b/src/scouter/llmcore/types.py
new file mode 100644
index 0000000..6b22afb
--- /dev/null
+++ b/src/scouter/llmcore/types.py
@@ -0,0 +1,26 @@
+# Re-export OpenAI types
+from openai.types.chat import (
+    ChatCompletion,
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionMessage,
+    ChatCompletionMessageParam,
+    ChatCompletionSystemMessageParam,
+    ChatCompletionToolMessageParam,
+    ChatCompletionToolParam,
+    ChatCompletionUserMessageParam,
+)
+from openai.types.chat.chat_completion_message_tool_call import (
+    ChatCompletionMessageToolCall,
+)
+
+__all__ = [
+    "ChatCompletion",
+    "ChatCompletionAssistantMessageParam",
+    "ChatCompletionMessage",
+    "ChatCompletionMessageParam",
+    "ChatCompletionMessageToolCall",
+    "ChatCompletionSystemMessageParam",
+    "ChatCompletionToolMessageParam",
+    "ChatCompletionToolParam",
+    "ChatCompletionUserMessageParam",
+]
diff --git a/src/scouter/llmcore/utils.py b/src/scouter/llmcore/utils.py
new file mode 100644
index 0000000..8d53808
--- /dev/null
+++ b/src/scouter/llmcore/utils.py
@@ -0,0 +1,24 @@
+import random
+import time
+
+from openai import APIError, RateLimitError
+
+ERROR_MAX_RETRY = "max retries exceeded"
+
+
+def retry_loop(func, max_retries=5, base_delay=1):
+    last_exception: BaseException | None = None
+
+    for attempt in range(max_retries):
+        try:
+            return func()
+        except (RateLimitError, APIError) as e:  # noqa: PERF203
+            last_exception = e
+            if attempt == max_retries - 1:
+                break
+
+            sleep_time = base_delay * (2**attempt) + random.uniform(0, 0.5)  # noqa: S311
+            time.sleep(sleep_time)
+
+    # If we reach here, all retries failed
+    raise last_exception or RuntimeError(ERROR_MAX_RETRY)

From 09578310bc0b3b5f7a33986c0718a96952557e4f Mon Sep 17 00:00:00 2001
From: farhoud <farhoud.m7@gmail.com>
Date: Wed, 10 Dec 2025 19:20:45 +0330
Subject: [PATCH 8/8] Remove old llm module files

---
 src/scouter/llm/__init__.py   |   0
 src/scouter/llm/agent.py      |  56 ----------
 src/scouter/llm/client.py     |  55 ----------
 src/scouter/llm/exceptions.py |  10 --
 src/scouter/llm/tools.py      | 185 ----------------------------------
 src/scouter/llm/types.py      |  26 -----
 src/scouter/llm/utils.py      |  24 -----
 7 files changed, 356 deletions(-)
 delete mode 100644 src/scouter/llm/__init__.py
 delete mode 100644 src/scouter/llm/agent.py
 delete mode 100644 src/scouter/llm/client.py
 delete mode 100644 src/scouter/llm/exceptions.py
 delete mode 100644 src/scouter/llm/tools.py
 delete mode 100644 src/scouter/llm/types.py
 delete mode 100644 src/scouter/llm/utils.py

diff --git a/src/scouter/llm/__init__.py b/src/scouter/llm/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/scouter/llm/agent.py b/src/scouter/llm/agent.py
deleted file mode 100644
index 777f97e..0000000
--- a/src/scouter/llm/agent.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import json
-from collections.abc import Iterable
-from typing import TYPE_CHECKING, cast
-
-from openai.types.chat import (
-    ChatCompletionMessage,
-    ChatCompletionMessageParam,
-    ChatCompletionToolMessageParam,
-    ChatCompletionToolUnionParam,
-)
-
-if TYPE_CHECKING:
-    from openai.types.chat.chat_completion_message_tool_call import (
-        ChatCompletionMessageToolCall,
-    )
-
-from .client import call_llm
-from .tools import run_tool
-
-
-def create_agent(
-    model: str = "gpt-5.1-mini",
-    tools: Iterable[ChatCompletionToolUnionParam] | None = None,
-):
-    """
-    Returns a function that acts as a functional agent with its own tools and model.
-    """
-
-    def agent(
-        messages: list[ChatCompletionMessageParam], max_steps: int = 5
-    ) -> ChatCompletionMessage:
-        steps = 0
-        while steps < max_steps:
-            msg: ChatCompletionMessage = call_llm(model, messages, tools)
-            messages.append(cast("ChatCompletionMessageParam", msg))
-
-            # Handle tool calls
-            if msg.tool_calls:
-                for tc in msg.tool_calls:
-                    tc = cast("ChatCompletionMessageToolCall", tc)
-                    args = json.loads(tc.function.arguments)
-                    output = run_tool(tc.function.name, args)
-                    messages.append(
-                        cast(
-                            "ChatCompletionMessageParam",
-                            ChatCompletionToolMessageParam(
-                                role="tool", content=output, tool_call_id=tc.id
-                            ),
-                        )
-                    )
-            else:
-                break
-            steps += 1
-        return cast("ChatCompletionMessage", messages[-1])
-
-    return agent
diff --git a/src/scouter/llm/client.py b/src/scouter/llm/client.py
deleted file mode 100644
index 01821cc..0000000
--- a/src/scouter/llm/client.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import os
-from collections.abc import Iterable
-from dataclasses import dataclass
-
-from openai import OpenAI
-from openai.types.chat import (
-    ChatCompletionMessage,
-    ChatCompletionMessageParam,
-    ChatCompletionToolUnionParam,
-)
-
-from .utils import retry_loop
-
-
-@dataclass(slots=True)
-class LLMConfig:
-    api_key: str | None = None
-    base_url: str | None = None
-    timeout: int = 30
-    max_retries: int = 3
-
-    @staticmethod
-    def load_from_env() -> "LLMConfig":
-        return LLMConfig(
-            api_key=os.getenv("OPENAI_API_KEY"),
-            base_url=os.getenv("OPENAI_BASE_URL"),
-        )
-
-
-def create_llm_client(cfg: LLMConfig | None = None) -> OpenAI:
-    cfg = cfg or LLMConfig.load_from_env()
-
-    return OpenAI(
-        api_key=cfg.api_key,
-        base_url=cfg.base_url,
-        timeout=cfg.timeout,
-        max_retries=cfg.max_retries,
-    )
-
-
-client = create_llm_client()
-
-
-def call_llm(
-    model: str,
-    messages: list[ChatCompletionMessageParam],
-    tools: Iterable[ChatCompletionToolUnionParam] | None = None,
-) -> ChatCompletionMessage:
-    def _call():
-        return client.chat.completions.create(
-            model=model, messages=messages, tools=tools or []
-        )
-
-    res = retry_loop(_call)
-    return res.choices[0].message
diff --git a/src/scouter/llm/exceptions.py b/src/scouter/llm/exceptions.py
deleted file mode 100644
index 417742b..0000000
--- a/src/scouter/llm/exceptions.py
+++ /dev/null
@@ -1,10 +0,0 @@
-class LLMError(Exception):
-    """Base exception for LLM related errors."""
-
-
-class ToolExecutionError(LLMError):
-    """Raised when a tool fails to execute."""
-
-
-class AgentError(LLMError):
-    """Raised when agent operations fail."""
diff --git a/src/scouter/llm/tools.py b/src/scouter/llm/tools.py
deleted file mode 100644
index f592fed..0000000
--- a/src/scouter/llm/tools.py
+++ /dev/null
@@ -1,185 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import inspect
-import json
-from collections.abc import Callable  # noqa: TC003
-from typing import TYPE_CHECKING, Any, get_origin
-
-from pydantic import BaseModel, Field
-
-from .exceptions import ToolExecutionError
-
-if TYPE_CHECKING:
-    from .types import ChatCompletionToolParam
-
-
-class Tool(BaseModel):
-    name: str
-    description: str
-    handler: Callable[[BaseModel], BaseModel | str]
-
-    # Auto-filled fields
-    parameters_schema: dict = Field(default_factory=dict)
-    output_schema: dict = Field(default_factory=dict)
-    description_with_output: str = ""
-
-    # Internal: Store the actual class types for runtime conversion
-    input_type: type[BaseModel] | None = None
-
-    def model_post_init(self, /, __context) -> None:
-        # 1. Extract input model from handler signature
-        sig = inspect.signature(self.handler)
-        if not sig.parameters:
-            msg = f"Handler for tool '{self.name}' must have at least one argument (the input Pydantic model)."
-            raise TypeError(msg)
-
-        param = next(iter(sig.parameters.values()))
-        input_model = param.annotation
-
-        origin = get_origin(input_model) or input_model
-        if not (isinstance(origin, type) and issubclass(origin, BaseModel)):
-            msg = f"Handler first param for '{self.name}' must be a Pydantic BaseModel, got {origin}"
-            raise TypeError(msg)
-
-        self.input_type = origin  # SAVE THIS for execute_tool
-
-        # 2. Extract return type
-        return_type = sig.return_annotation
-        return_origin = get_origin(return_type) or return_type
-        if return_origin is str:
-            pass  # Allow str
-        elif isinstance(return_origin, type) and issubclass(return_origin, BaseModel):
-            pass  # Allow BaseModel
-        else:
-            msg = f"Handler for '{self.name}' must return a Pydantic BaseModel or str"
-            raise TypeError(msg)
-
-        # 3. Auto-fill everything
-        self.parameters_schema = origin.model_json_schema()
-        if return_origin is str:
-            self.output_schema = {"type": "string"}
-        else:
-            self.output_schema = return_origin.model_json_schema()  # type: ignore[reportAttributeAccessIssue]
-
-        # 4. Enrich description with pretty-printed output schema
-        if return_origin is str:
-            self.description_with_output = (
-                f"{self.description}\n\nThe tool will **always return a string**."
-            )
-        else:
-            pretty_output = json.dumps(self.output_schema, indent=2)
-            self.description_with_output = (
-                f"{self.description}\n\n"
-                f"The tool will **always return JSON matching this exact schema**:\n"
-                f"```json\n{pretty_output}\n```"
-            )
-
-    def openai_tool_spec(self) -> ChatCompletionToolParam:
-        return {
-            "type": "function",
-            "function": {
-                "name": self.name,
-                "description": self.description_with_output,
-                "parameters": self.parameters_schema,
-            },
-        }
-
-
-def create_tool(
-    name: str, description: str, handler: Callable[[BaseModel], BaseModel | str]
-) -> Tool:
-    """
-    Creates a Pydantic Tool instance.
-    """
-    return Tool(name=name, description=description, handler=handler)
-
-
-def tool(name: str | None = None, description: str | None = None):
-    """
-    Decorator to create and register a Pydantic-based tool.
-    The decorated function MUST take a Pydantic model and return a Pydantic model or a string.
-    """
-
-    def decorator(func: Callable[[BaseModel], BaseModel | str]):
-        tool_name = name or func.__name__
-        tool_desc = description or (func.__doc__ or "No description.").strip()
-
-        # Create the Tool instance
-        t = create_tool(tool_name, tool_desc, func)
-
-        # Register it
-        register_tool(t)
-
-        return func
-
-    return decorator
-
-
-def run_tool(name: str, raw_args: dict[str, Any]) -> str:
-    """
-    Looks up a tool by name and executes it.
-    """
-    tool_instance = lookup_tool(name)
-    return execute_tool(tool_instance, raw_args)
-
-
-def execute_tool(tool_instance: Tool, raw_args: dict[str, Any]) -> str:
-    """
-    Executes a Pydantic Tool.
-    1. Converts raw_args (dict) -> InputModel (Pydantic).
-    2. Calls handler(InputModel).
-    3. Gets OutputModel or str.
-    4. Returns OutputModel.model_dump_json() or the str.
-    """
-    try:
-        # 1. Instantiate the specific input model
-        input_model_cls = tool_instance.input_type
-        assert input_model_cls is not None
-        input_obj = input_model_cls(**raw_args)
-
-        # 2. Call Handler
-        handler = tool_instance.handler
-
-        if inspect.iscoroutinefunction(handler):
-            result_model = asyncio.run(handler(input_obj))
-        else:
-            result_model = handler(input_obj)
-
-        # 3. Validate Return
-        if not isinstance(result_model, (BaseModel, str)):
-            msg = f"Tool '{tool_instance.name}' handler did not return a Pydantic model or str."
-            raise ToolExecutionError(msg)  # noqa: TRY301
-
-        # 4. Serialize Output
-        if isinstance(result_model, str):
-            return result_model
-        return result_model.model_dump_json()
-
-    except Exception as e:
-        msg = f"Error executing tool '{tool_instance.name}': {e!s}"
-        raise ToolExecutionError(msg) from e
-
-
-# Global registry stores Tool instances
-TOOL_REGISTRY: dict[str, Tool] = {}
-
-
-def register_tool(tool_instance: Tool) -> None:
-    """
-    Registers a Tool object in the global registry.
-    """
-    if not tool_instance.name:
-        msg = "Cannot register tool without a name."
-        raise ToolExecutionError(msg)
-    TOOL_REGISTRY[tool_instance.name] = tool_instance
-
-
-def lookup_tool(name: str) -> Tool:
-    """
-    Retrieves a Tool object from the global registry.
-    """
-    if name not in TOOL_REGISTRY:
-        msg = f"Tool '{name}' not found in registry."
-        raise ToolExecutionError(msg)
-    return TOOL_REGISTRY[name]
diff --git a/src/scouter/llm/types.py b/src/scouter/llm/types.py
deleted file mode 100644
index 6b22afb..0000000
--- a/src/scouter/llm/types.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Re-export OpenAI types
-from openai.types.chat import (
-    ChatCompletion,
-    ChatCompletionAssistantMessageParam,
-    ChatCompletionMessage,
-    ChatCompletionMessageParam,
-    ChatCompletionSystemMessageParam,
-    ChatCompletionToolMessageParam,
-    ChatCompletionToolParam,
-    ChatCompletionUserMessageParam,
-)
-from openai.types.chat.chat_completion_message_tool_call import (
-    ChatCompletionMessageToolCall,
-)
-
-__all__ = [
-    "ChatCompletion",
-    "ChatCompletionAssistantMessageParam",
-    "ChatCompletionMessage",
-    "ChatCompletionMessageParam",
-    "ChatCompletionMessageToolCall",
-    "ChatCompletionSystemMessageParam",
-    "ChatCompletionToolMessageParam",
-    "ChatCompletionToolParam",
-    "ChatCompletionUserMessageParam",
-]
diff --git a/src/scouter/llm/utils.py b/src/scouter/llm/utils.py
deleted file mode 100644
index 8d53808..0000000
--- a/src/scouter/llm/utils.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import random
-import time
-
-from openai import APIError, RateLimitError
-
-ERROR_MAX_RETRY = "max retries exceeded"
-
-
-def retry_loop(func, max_retries=5, base_delay=1):
-    last_exception: BaseException | None = None
-
-    for attempt in range(max_retries):
-        try:
-            return func()
-        except (RateLimitError, APIError) as e:  # noqa: PERF203
-            last_exception = e
-            if attempt == max_retries - 1:
-                break
-
-            sleep_time = base_delay * (2**attempt) + random.uniform(0, 0.5)  # noqa: S311
-            time.sleep(sleep_time)
-
-    # If we reach here, all retries failed
-    raise last_exception or RuntimeError(ERROR_MAX_RETRY)