diff --git a/README.md b/README.md
index 2fb527e..fb21aba 100644
--- a/README.md
+++ b/README.md
@@ -4,11 +4,11 @@ Rapid assessment and retrieval from knowledge graph using Neo4j GraphRAG.
 
 ## Overview
 
-Scouter is a knowledge graph-based document retrieval system that:
+Scouter is a knowledge graph-based document retrieval system focused on MCP (Model Context Protocol) for agentic search:
 
 - Ingests PDFs and text documents using Neo4j GraphRAG's SimpleKGPipeline
-- Provides fast semantic search with relevance scoring
-- Supports both API and MCP (Model Context Protocol) interfaces
+- Provides agentic semantic search via MCP for LLM integration
+- Includes REST API for document ingestion
 - Includes evaluation framework for retrieval quality assessment
 
 ## Quick Start
@@ -62,31 +62,25 @@ curl -X POST "http://localhost:8000/v1/ingest" \
   -d '{"text": "Your document content", "metadata": {"source": "api"}}'
 ```
 
-### Search
-
-```bash
-# Search documents
-curl "http://localhost:8000/v1/search?query=your%20search%20term&limit=5"
-```
-
 ### Interactive API
 
 Visit <http://localhost:8000/docs> for interactive API documentation.
 
+**Note:** Search functionality is provided via MCP (Model Context Protocol) for agentic retrieval. Direct REST search API is not available.
+
 ## Architecture
 
 ### Components
 
 - **Ingestion Service**: Processes PDFs/text into knowledge graph using SimpleKGPipeline
-- **Search Service**: Performs semantic search with relevance scoring
-- **MCP Server**: Provides Model Context Protocol interface for LLM integration
+- **MCP Server**: Core component providing agentic search via Model Context Protocol for LLM integration
 - **Celery Workers**: Handle async document processing
 - **Redis**: Task queue and caching
 
 ### Data Flow
 
 1. Documents → Ingestion API → Celery Queue → Neo4j GraphRAG
-2. Search Query → Search API → Neo4j → Ranked Results
+2. Search Query → MCP Server → Agentic Search → Neo4j → Ranked Results
 
 ## Development
 
@@ -143,24 +137,26 @@ The project uses Neo4j with APOC plugin for enhanced graph procedures. Docker se
 
 ## Examples
 
-### RAG Chatbot
+### MCP Integration (Primary Use Case)
 
 ```bash
-cd examples/chatbot
-python chatbot.py
+# Start MCP server
+python -m scouter_app.agent.mcp
+
+# Use with Claude Desktop or other MCP-compatible tools
 ```
 
-Interactive chatbot that uses Scouter for retrieval and OpenRouter for generation.
+Scouter's MCP server enables agentic search for LLMs, providing semantic retrieval from the knowledge graph.
 
-### MCP Integration
+### RAG Chatbot
 
 ```bash
-# Start MCP server
-python -m scouter_app.agent.mcp
-
-# Use with Claude Desktop or other MCP-compatible tools
+cd examples/chatbot
+python chatbot.py
 ```
 
+Interactive chatbot that uses Scouter for retrieval and OpenRouter for generation.
+
 ## Project Structure
 
 ```
diff --git a/app_main.py b/app_main.py
index 4ca641e..b5e3713 100644
--- a/app_main.py
+++ b/app_main.py
@@ -4,9 +4,9 @@
 
 from fastapi import FastAPI
 
-from src.scouter_app.agent.mcp import app as mcp_app
-from src.scouter_app.config.llm import get_client_config
-from src.scouter_app.ingestion.api import router as ingestion_router
+from src.scouter.agent.mcp import app as mcp_app
+from src.scouter.config.llm import get_client_config
+from src.scouter.ingestion.api import router as ingestion_router
 
 logger = logging.getLogger(__name__)
 
diff --git a/evals/conftest.py b/evals/conftest.py
index 671946a..1b18da7 100644
--- a/evals/conftest.py
+++ b/evals/conftest.py
@@ -10,7 +10,7 @@
 
 import pytest
 
-from scouter_app.ingestion.service import IngestionService
+from scouter.ingestion.service import IngestionService
 
 from .utils import create_light_subset
 
diff --git a/evals/test_retrieval_relevancy.py b/evals/test_retrieval_relevancy.py
index a8dca75..a82ed91 100644
--- a/evals/test_retrieval_relevancy.py
+++ b/evals/test_retrieval_relevancy.py
@@ -3,6 +3,7 @@
 from deepeval.test_case import LLMTestCase
 
 from examples.chatbot.chatbot import chat_with_rag
+
 from .utils import OpenRouterLLM
 
 THRESHOLD = 0.5
diff --git a/examples/chatbot/chatbot.py b/examples/chatbot/chatbot.py
index 787161b..6768628 100644
--- a/examples/chatbot/chatbot.py
+++ b/examples/chatbot/chatbot.py
@@ -6,7 +6,7 @@
 from mcp import ClientSession
 from mcp.client.stdio import StdioServerParameters, stdio_client
 
-from scouter_app.config.llm import (
+from scouter.config.llm import (
     DEFAULT_MODEL,
     call_with_rate_limit,
     get_chatbot_client,
@@ -32,8 +32,6 @@ async def chat_with_rag(query: str) -> str:
 
         mcp_tools = await session.list_tools()
 
-        print(mcp_tools)
-
         # Convert MCP tools to OpenAI format
         openai_tools = [
             {
@@ -72,15 +70,15 @@ async def chat_with_rag(query: str) -> str:
                 tool_args = json.loads(tool_call.function.arguments)
                 result = await session.call_tool(tool_name, tool_args)
                 # Add to messages
-                messages.append(  # type: ignore
-                    {"role": "assistant", "content": "", "tool_calls": [tool_call]}  # type: ignore
+                messages.append(  # type: ignore[PGH003]
+                    {"role": "assistant", "content": "", "tool_calls": [tool_call]}  # type: ignore[PGH003]
                 )
-                messages.append(  # type: ignore
+                messages.append(  # type: ignore[PGH003]
                     {
                         "role": "tool",
                         "content": str(result),
                         "tool_call_id": tool_call.id,
-                    }  # type: ignore
+                    }  # type: ignore[PGH003]
                 )
 
             # Call LLM again with updated messages
diff --git a/pyproject.toml b/pyproject.toml
index f464e9a..0ab3e42 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,6 +38,9 @@ asyncio_mode = "auto"
 [tool.ruff]
 extend = "ruff.toml"
 
+[tool.hatch.build.targets.wheel]
+packages = ["src/scouter"]
+
 [dependency-groups]
 dev = [
     "pre-commit>=4.5.0",
diff --git a/src/scouter_app/__init__.py b/src/scouter/__init__.py
similarity index 100%
rename from src/scouter_app/__init__.py
rename to src/scouter/__init__.py
diff --git a/src/scouter_app/agent/__init__.py b/src/scouter/agent/__init__.py
similarity index 100%
rename from src/scouter_app/agent/__init__.py
rename to src/scouter/agent/__init__.py
diff --git a/src/scouter_app/agent/agent.py b/src/scouter/agent/agent.py
similarity index 96%
rename from src/scouter_app/agent/agent.py
rename to src/scouter/agent/agent.py
index 3f1addf..d3e1861 100644
--- a/src/scouter_app/agent/agent.py
+++ b/src/scouter/agent/agent.py
@@ -1,7 +1,7 @@
 import json
 
-from scouter_app.agent.tools import get_tools
-from scouter_app.config.llm import (
+from scouter.agent.tools import get_tools
+from scouter.config.llm import (
     DEFAULT_MODEL,
     call_with_rate_limit,
     get_scouter_client,
diff --git a/src/scouter_app/agent/mcp.py b/src/scouter/agent/mcp.py
similarity index 100%
rename from src/scouter_app/agent/mcp.py
rename to src/scouter/agent/mcp.py
diff --git a/src/scouter_app/agent/tools.py b/src/scouter/agent/tools.py
similarity index 92%
rename from src/scouter_app/agent/tools.py
rename to src/scouter/agent/tools.py
index ea3cf41..f73b8fd 100644
--- a/src/scouter_app/agent/tools.py
+++ b/src/scouter/agent/tools.py
@@ -3,8 +3,8 @@
 from neo4j_graphrag.retrievers import VectorRetriever
 from pydantic import BaseModel, Field
 
-from scouter_app.config.llm import get_neo4j_driver, get_neo4j_embedder
-from scouter_app.shared.domain_models import VectorSearchResult
+from scouter.config.llm import get_neo4j_driver, get_neo4j_embedder
+from scouter.shared.domain_models import VectorSearchResult
 
 
 class SemanticSearchParams(BaseModel):
diff --git a/src/scouter_app/config/__init__.py b/src/scouter/config/__init__.py
similarity index 100%
rename from src/scouter_app/config/__init__.py
rename to src/scouter/config/__init__.py
diff --git a/src/scouter_app/config/llm.py b/src/scouter/config/llm.py
similarity index 98%
rename from src/scouter_app/config/llm.py
rename to src/scouter/config/llm.py
index c4d8217..ca0d95f 100644
--- a/src/scouter_app/config/llm.py
+++ b/src/scouter/config/llm.py
@@ -93,7 +93,7 @@ def get_neo4j_embedder() -> SentenceTransformerEmbeddings:
 def call_with_rate_limit(client: openai.OpenAI, **kwargs):
     """Call OpenAI client with rate limit handling."""
     max_retries = 5
-    for attempt in range(max_retries):  # noqa: PERF203
+    for attempt in range(max_retries):
         try:
             return client.chat.completions.create(**kwargs)
         except openai.RateLimitError:  # noqa: PERF203
diff --git a/src/scouter_app/ingestion/__init__.py b/src/scouter/ingestion/__init__.py
similarity index 100%
rename from src/scouter_app/ingestion/__init__.py
rename to src/scouter/ingestion/__init__.py
diff --git a/src/scouter_app/ingestion/api.py b/src/scouter/ingestion/api.py
similarity index 90%
rename from src/scouter_app/ingestion/api.py
rename to src/scouter/ingestion/api.py
index 0d2a960..72c3118 100644
--- a/src/scouter_app/ingestion/api.py
+++ b/src/scouter/ingestion/api.py
@@ -5,9 +5,9 @@
 
 from fastapi import APIRouter, Form, UploadFile
 
-from scouter_app.config.llm import get_client_config
-from scouter_app.ingestion.tasks import process_document_task
-from scouter_app.shared.domain_models import IngestResponse
+from scouter.config.llm import get_client_config
+from scouter.ingestion.tasks import process_document_task
+from scouter.shared.domain_models import IngestResponse
 
 router = APIRouter()
 
diff --git a/src/scouter_app/ingestion/service.py b/src/scouter/ingestion/service.py
similarity index 96%
rename from src/scouter_app/ingestion/service.py
rename to src/scouter/ingestion/service.py
index 3d930c4..ef3c475 100644
--- a/src/scouter_app/ingestion/service.py
+++ b/src/scouter/ingestion/service.py
@@ -4,7 +4,7 @@
 
 from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline
 
-from scouter_app.config.llm import get_neo4j_driver, get_neo4j_embedder, get_neo4j_llm
+from scouter.config.llm import get_neo4j_driver, get_neo4j_embedder, get_neo4j_llm
 
 
 class IngestionService:
diff --git a/src/scouter_app/ingestion/tasks.py b/src/scouter/ingestion/tasks.py
similarity index 93%
rename from src/scouter_app/ingestion/tasks.py
rename to src/scouter/ingestion/tasks.py
index 5587491..21ba09d 100644
--- a/src/scouter_app/ingestion/tasks.py
+++ b/src/scouter/ingestion/tasks.py
@@ -7,10 +7,10 @@
 
 from celery import Celery
 
-from scouter_app.ingestion.service import IngestionService
+from scouter.ingestion.service import IngestionService
 
 app = Celery(
-    "scouter_app.ingestion.tasks",
+    "scouter.ingestion.tasks",
     broker=os.getenv("REDIS_URL", "redis://localhost:6379/0"),
 )
 
diff --git a/src/scouter/llmcore/__init__.py b/src/scouter/llmcore/__init__.py
new file mode 100644
index 0000000..d190412
--- /dev/null
+++ b/src/scouter/llmcore/__init__.py
@@ -0,0 +1,53 @@
+from .agent import AgentRun, run_agent
+from .client import ChatCompletionOptions, LLMConfig, call_llm, create_llm_client
+from .exceptions import AgentError, LLMError, ToolExecutionError
+from .tools import (
+    Tool,
+    create_tool,
+    execute_tool,
+    lookup_tool,
+    register_tool,
+    run_tool,
+    tool,
+)
+from .types import (
+    ChatCompletion,
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionMessage,
+    ChatCompletionMessageParam,
+    ChatCompletionMessageToolCall,
+    ChatCompletionSystemMessageParam,
+    ChatCompletionToolMessageParam,
+    ChatCompletionToolParam,
+    ChatCompletionUserMessageParam,
+)
+from .utils import retry_loop
+
+__all__ = [
+    "AgentError",
+    "AgentRun",
+    "ChatCompletion",
+    "ChatCompletionAssistantMessageParam",
+    "ChatCompletionMessage",
+    "ChatCompletionMessageParam",
+    "ChatCompletionMessageToolCall",
+    "ChatCompletionOptions",
+    "ChatCompletionSystemMessageParam",
+    "ChatCompletionToolMessageParam",
+    "ChatCompletionToolParam",
+    "ChatCompletionUserMessageParam",
+    "LLMConfig",
+    "LLMError",
+    "Tool",
+    "ToolExecutionError",
+    "call_llm",
+    "create_llm_client",
+    "create_tool",
+    "execute_tool",
+    "lookup_tool",
+    "register_tool",
+    "retry_loop",
+    "run_agent",
+    "run_tool",
+    "tool",
+]
diff --git a/src/scouter/llmcore/agent.py b/src/scouter/llmcore/agent.py
new file mode 100644
index 0000000..246a14e
--- /dev/null
+++ b/src/scouter/llmcore/agent.py
@@ -0,0 +1,164 @@
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from time import time
+from typing import TYPE_CHECKING, cast
+
+from openai.types.chat import (
+    ChatCompletion,
+    ChatCompletionMessageParam,
+    ChatCompletionToolMessageParam,
+    ChatCompletionToolUnionParam,
+)
+
+if TYPE_CHECKING:
+    from collections.abc import Callable, Iterable
+
+    from openai.types.chat.chat_completion_message_tool_call import (
+        ChatCompletionMessageToolCall,
+    )
+
+from .client import ChatCompletionOptions, call_llm
+from .tools import run_tool
+
+
+@dataclass
+class InputStep:
+    message: ChatCompletionMessageParam
+
+
+@dataclass
+class LLMStep:
+    completion: ChatCompletion
+
+    @property
+    def message(self) -> ChatCompletionMessageParam:
+        return cast("ChatCompletionMessageParam", self.completion.choices[0].message)
+
+
+@dataclass
+class ToolStep:
+    tool_call_id: str
+    tool_name: str
+    args: dict
+    output: str
+    execution_time: float
+    success: bool
+    error_message: str | None
+
+    @property
+    def message(self) -> ChatCompletionToolMessageParam:
+        return ChatCompletionToolMessageParam(
+            role="tool", content=self.output, tool_call_id=self.tool_call_id
+        )
+
+
+Step = InputStep | LLMStep | ToolStep
+
+
+@dataclass
+class AgentRun:
+    continue_condition: Callable[[AgentRun], bool] = field(
+        default_factory=lambda: default_continue_condition_factory()
+    )
+    steps: list[Step] = field(default_factory=list)
+
+    def add_step(self, step: Step) -> None:
+        """Add a step to the run."""
+        self.steps.append(step)
+
+    @property
+    def conversation_history(self) -> list[ChatCompletionMessageParam]:
+        return [step.message for step in self.steps]
+
+    @property
+    def total_usage(
+        self,
+    ) -> dict:  # Simplified, can make proper ChatCompletionUsage later
+        total = {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0}
+        for step in self.steps:
+            if isinstance(step, LLMStep) and step.completion.usage:
+                usage = step.completion.usage
+                total["completion_tokens"] += usage.completion_tokens or 0
+                total["prompt_tokens"] += usage.prompt_tokens or 0
+                total["total_tokens"] += usage.total_tokens or 0
+        return total
+
+    @property
+    def last_output(self) -> str:
+        if not self.steps:
+            msg = "No steps in run"
+            raise ValueError(msg)
+        last_step = self.steps[-1]
+        if isinstance(last_step, LLMStep):
+            content = last_step.message.get("content")
+            return content if isinstance(content, str) else ""
+        if isinstance(last_step, ToolStep):
+            return last_step.output
+        return ""
+
+    @property
+    def tool_executions(self) -> list[ToolStep]:
+        return [step for step in self.steps if isinstance(step, ToolStep)]
+
+
+def default_continue_condition_factory(
+    max_steps: int | None = None,
+) -> Callable[[AgentRun], bool]:
+    def condition(run: AgentRun) -> bool:
+        if max_steps is not None:
+            llm_count = sum(1 for step in run.steps if isinstance(step, LLMStep))
+            if llm_count >= max_steps:
+                return False
+        # Filter out InputStep to find the last meaningful step
+        non_input_steps = [
+            step for step in run.steps if not isinstance(step, InputStep)
+        ]
+        if not non_input_steps:
+            return True  # Only InputSteps present, initial state
+        last_non_input = non_input_steps[-1]
+        return isinstance(last_non_input, ToolStep)
+
+    return condition
+
+
+def run_agent(
+    run: AgentRun,
+    model: str = "gpt-4o-mini",
+    tools: Iterable[ChatCompletionToolUnionParam] | None = None,
+    options: ChatCompletionOptions | None = None,
+):
+    while run.continue_condition(run):
+        completion: ChatCompletion = call_llm(
+            model, run.conversation_history, tools, options
+        )
+        msg = completion.choices[0].message
+        run.add_step(LLMStep(completion))
+
+        # Handle tool calls
+        if msg.tool_calls:
+            for tc in msg.tool_calls:
+                tc = cast("ChatCompletionMessageToolCall", tc)
+                args = json.loads(tc.function.arguments)
+                start = time()
+                try:
+                    output = run_tool(tc.function.name, args)
+                    success = True
+                    error = None
+                except Exception as e:  # noqa: BLE001
+                    output = ""
+                    success = False
+                    error = str(e)
+                end = time()
+                run.add_step(
+                    ToolStep(
+                        tc.id,
+                        tc.function.name,
+                        args,
+                        output,
+                        end - start,
+                        success,
+                        error,
+                    )
+                )
diff --git a/src/scouter/llmcore/client.py b/src/scouter/llmcore/client.py
new file mode 100644
index 0000000..8944e18
--- /dev/null
+++ b/src/scouter/llmcore/client.py
@@ -0,0 +1,86 @@
+import os
+from collections.abc import Iterable
+from dataclasses import dataclass
+from typing import TypedDict
+
+from openai import OpenAI
+from openai.types.chat import (
+    ChatCompletion,
+    ChatCompletionMessageParam,
+    ChatCompletionToolUnionParam,
+)
+
+from .utils import retry_loop
+
+
+class ChatCompletionOptions(TypedDict, total=False):
+    """Options for ChatCompletion API calls.
+
+    Attributes:
+        max_tokens: Maximum number of tokens to generate.
+        temperature: Sampling temperature (0.0 to 2.0).
+        top_p: Nucleus sampling parameter.
+        frequency_penalty: Frequency penalty (-2.0 to 2.0).
+        presence_penalty: Presence penalty (-2.0 to 2.0).
+        stop: List of stop sequences.
+    """
+
+    max_tokens: int
+    temperature: float
+    top_p: float
+    frequency_penalty: float
+    presence_penalty: float
+    stop: list[str]
+
+
+@dataclass(slots=True)
+class LLMConfig:
+    api_key: str | None = None
+    base_url: str | None = None
+    timeout: int = 30
+    max_retries: int = 3
+
+    @staticmethod
+    def load_from_env() -> "LLMConfig":
+        return LLMConfig(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            base_url=os.getenv("OPENAI_BASE_URL"),
+        )
+
+
+def create_llm_client(cfg: LLMConfig | None = None) -> OpenAI:
+    cfg = cfg or LLMConfig.load_from_env()
+
+    return OpenAI(
+        api_key=cfg.api_key,
+        base_url=cfg.base_url,
+        timeout=cfg.timeout,
+        max_retries=cfg.max_retries,
+    )
+
+
+client = create_llm_client()
+
+
+def call_llm(
+    model: str,
+    messages: list[ChatCompletionMessageParam],
+    tools: Iterable[ChatCompletionToolUnionParam] | None = None,
+    options: ChatCompletionOptions | None = None,
+) -> ChatCompletion:
+    """Call the LLM with the given parameters.
+
+    Args:
+        model: The model to use.
+        messages: List of messages.
+        tools: Optional tools.
+        options: Optional ChatCompletion options like max_tokens, temperature, etc.
+    """
+
+    def _call():
+        kwargs = options or {}
+        return client.chat.completions.create(
+            model=model, messages=messages, tools=tools or [], **kwargs
+        )
+
+    return retry_loop(_call)
diff --git a/src/scouter/llmcore/exceptions.py b/src/scouter/llmcore/exceptions.py
new file mode 100644
index 0000000..417742b
--- /dev/null
+++ b/src/scouter/llmcore/exceptions.py
@@ -0,0 +1,10 @@
+class LLMError(Exception):
+    """Base exception for LLM related errors."""
+
+
+class ToolExecutionError(LLMError):
+    """Raised when a tool fails to execute."""
+
+
+class AgentError(LLMError):
+    """Raised when agent operations fail."""
diff --git a/src/scouter/llmcore/tools.py b/src/scouter/llmcore/tools.py
new file mode 100644
index 0000000..f592fed
--- /dev/null
+++ b/src/scouter/llmcore/tools.py
@@ -0,0 +1,185 @@
+from __future__ import annotations
+
+import asyncio
+import inspect
+import json
+from collections.abc import Callable  # noqa: TC003
+from typing import TYPE_CHECKING, Any, get_origin
+
+from pydantic import BaseModel, Field
+
+from .exceptions import ToolExecutionError
+
+if TYPE_CHECKING:
+    from .types import ChatCompletionToolParam
+
+
+class Tool(BaseModel):
+    name: str
+    description: str
+    handler: Callable[[BaseModel], BaseModel | str]
+
+    # Auto-filled fields
+    parameters_schema: dict = Field(default_factory=dict)
+    output_schema: dict = Field(default_factory=dict)
+    description_with_output: str = ""
+
+    # Internal: Store the actual class types for runtime conversion
+    input_type: type[BaseModel] | None = None
+
+    def model_post_init(self, /, __context) -> None:
+        # 1. Extract input model from handler signature
+        sig = inspect.signature(self.handler)
+        if not sig.parameters:
+            msg = f"Handler for tool '{self.name}' must have at least one argument (the input Pydantic model)."
+            raise TypeError(msg)
+
+        param = next(iter(sig.parameters.values()))
+        input_model = param.annotation
+
+        origin = get_origin(input_model) or input_model
+        if not (isinstance(origin, type) and issubclass(origin, BaseModel)):
+            msg = f"Handler first param for '{self.name}' must be a Pydantic BaseModel, got {origin}"
+            raise TypeError(msg)
+
+        self.input_type = origin  # SAVE THIS for execute_tool
+
+        # 2. Extract return type
+        return_type = sig.return_annotation
+        return_origin = get_origin(return_type) or return_type
+        if return_origin is str:
+            pass  # Allow str
+        elif isinstance(return_origin, type) and issubclass(return_origin, BaseModel):
+            pass  # Allow BaseModel
+        else:
+            msg = f"Handler for '{self.name}' must return a Pydantic BaseModel or str"
+            raise TypeError(msg)
+
+        # 3. Auto-fill everything
+        self.parameters_schema = origin.model_json_schema()
+        if return_origin is str:
+            self.output_schema = {"type": "string"}
+        else:
+            self.output_schema = return_origin.model_json_schema()  # type: ignore[reportAttributeAccessIssue]
+
+        # 4. Enrich description with pretty-printed output schema
+        if return_origin is str:
+            self.description_with_output = (
+                f"{self.description}\n\nThe tool will **always return a string**."
+            )
+        else:
+            pretty_output = json.dumps(self.output_schema, indent=2)
+            self.description_with_output = (
+                f"{self.description}\n\n"
+                f"The tool will **always return JSON matching this exact schema**:\n"
+                f"```json\n{pretty_output}\n```"
+            )
+
+    def openai_tool_spec(self) -> ChatCompletionToolParam:
+        return {
+            "type": "function",
+            "function": {
+                "name": self.name,
+                "description": self.description_with_output,
+                "parameters": self.parameters_schema,
+            },
+        }
+
+
+def create_tool(
+    name: str, description: str, handler: Callable[[BaseModel], BaseModel | str]
+) -> Tool:
+    """
+    Creates a Pydantic Tool instance.
+    """
+    return Tool(name=name, description=description, handler=handler)
+
+
+def tool(name: str | None = None, description: str | None = None):
+    """
+    Decorator to create and register a Pydantic-based tool.
+    The decorated function MUST take a Pydantic model and return a Pydantic model or a string.
+    """
+
+    def decorator(func: Callable[[BaseModel], BaseModel | str]):
+        tool_name = name or func.__name__
+        tool_desc = description or (func.__doc__ or "No description.").strip()
+
+        # Create the Tool instance
+        t = create_tool(tool_name, tool_desc, func)
+
+        # Register it
+        register_tool(t)
+
+        return func
+
+    return decorator
+
+
+def run_tool(name: str, raw_args: dict[str, Any]) -> str:
+    """
+    Looks up a tool by name and executes it.
+    """
+    tool_instance = lookup_tool(name)
+    return execute_tool(tool_instance, raw_args)
+
+
+def execute_tool(tool_instance: Tool, raw_args: dict[str, Any]) -> str:
+    """
+    Executes a Pydantic Tool.
+    1. Converts raw_args (dict) -> InputModel (Pydantic).
+    2. Calls handler(InputModel).
+    3. Gets OutputModel or str.
+    4. Returns OutputModel.model_dump_json() or the str.
+    """
+    try:
+        # 1. Instantiate the specific input model
+        input_model_cls = tool_instance.input_type
+        assert input_model_cls is not None
+        input_obj = input_model_cls(**raw_args)
+
+        # 2. Call Handler
+        handler = tool_instance.handler
+
+        if inspect.iscoroutinefunction(handler):
+            result_model = asyncio.run(handler(input_obj))
+        else:
+            result_model = handler(input_obj)
+
+        # 3. Validate Return
+        if not isinstance(result_model, (BaseModel, str)):
+            msg = f"Tool '{tool_instance.name}' handler did not return a Pydantic model or str."
+            raise ToolExecutionError(msg)  # noqa: TRY301
+
+        # 4. Serialize Output
+        if isinstance(result_model, str):
+            return result_model
+        return result_model.model_dump_json()
+
+    except Exception as e:
+        msg = f"Error executing tool '{tool_instance.name}': {e!s}"
+        raise ToolExecutionError(msg) from e
+
+
+# Global registry stores Tool instances
+TOOL_REGISTRY: dict[str, Tool] = {}
+
+
+def register_tool(tool_instance: Tool) -> None:
+    """
+    Registers a Tool object in the global registry.
+    """
+    if not tool_instance.name:
+        msg = "Cannot register tool without a name."
+        raise ToolExecutionError(msg)
+    TOOL_REGISTRY[tool_instance.name] = tool_instance
+
+
+def lookup_tool(name: str) -> Tool:
+    """
+    Retrieves a Tool object from the global registry.
+    """
+    if name not in TOOL_REGISTRY:
+        msg = f"Tool '{name}' not found in registry."
+        raise ToolExecutionError(msg)
+    return TOOL_REGISTRY[name]
diff --git a/src/scouter/llmcore/types.py b/src/scouter/llmcore/types.py
new file mode 100644
index 0000000..6b22afb
--- /dev/null
+++ b/src/scouter/llmcore/types.py
@@ -0,0 +1,26 @@
+# Re-export OpenAI types
+from openai.types.chat import (
+    ChatCompletion,
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionMessage,
+    ChatCompletionMessageParam,
+    ChatCompletionSystemMessageParam,
+    ChatCompletionToolMessageParam,
+    ChatCompletionToolParam,
+    ChatCompletionUserMessageParam,
+)
+from openai.types.chat.chat_completion_message_tool_call import (
+    ChatCompletionMessageToolCall,
+)
+
+__all__ = [
+    "ChatCompletion",
+    "ChatCompletionAssistantMessageParam",
+    "ChatCompletionMessage",
+    "ChatCompletionMessageParam",
+    "ChatCompletionMessageToolCall",
+    "ChatCompletionSystemMessageParam",
+    "ChatCompletionToolMessageParam",
+    "ChatCompletionToolParam",
+    "ChatCompletionUserMessageParam",
+]
diff --git a/src/scouter/llmcore/utils.py b/src/scouter/llmcore/utils.py
new file mode 100644
index 0000000..8d53808
--- /dev/null
+++ b/src/scouter/llmcore/utils.py
@@ -0,0 +1,24 @@
+import random
+import time
+
+from openai import APIError, RateLimitError
+
+ERROR_MAX_RETRY = "max retries exceeded"
+
+
+def retry_loop(func, max_retries=5, base_delay=1):
+    last_exception: BaseException | None = None
+
+    for attempt in range(max_retries):
+        try:
+            return func()
+        except (RateLimitError, APIError) as e:  # noqa: PERF203
+            last_exception = e
+            if attempt == max_retries - 1:
+                break
+
+            sleep_time = base_delay * (2**attempt) + random.uniform(0, 0.5)  # noqa: S311
+            time.sleep(sleep_time)
+
+    # If we reach here, all retries failed
+    raise last_exception or RuntimeError(ERROR_MAX_RETRY)
diff --git a/src/scouter_app/shared/__init__.py b/src/scouter/shared/__init__.py
similarity index 100%
rename from src/scouter_app/shared/__init__.py
rename to src/scouter/shared/__init__.py
diff --git a/src/scouter_app/shared/domain_models.py b/src/scouter/shared/domain_models.py
similarity index 100%
rename from src/scouter_app/shared/domain_models.py
rename to src/scouter/shared/domain_models.py