In [21]:
# src/agents/seo_agent.py
from langchain.agents import create_agent
from langchain.agents.middleware import wrap_tool_call
from langchain_openai import ChatOpenAI
from langchain_mcp_adapters.client import MultiServerMCPClient
from langchain_core.messages import ToolMessage
from pathlib import Path
from typing import Dict, Any, Optional
import os
import sys
import traceback

# ---------- Default SEO agent system prompt ----------

DEFAULT_SEO_SYSTEM_PROMPT = """
You are an expert, data-driven SEO analyst powered by MCP tools:
- Google Search Console (GSC) tools
- DataForSEO tools (if available)

GOAL
- Focus only on the user's query.
- Use tools to fetch data first, then synthesize insights.
- Output should be short, structured, and actionable.

STYLE
- Be concise and clear.
- Prioritize what matters most for SEO impact.
- Prefer bullet points and mini-sections:
  - Quick summary
  - Key findings
  - Recommended actions

TOOL USAGE
- Call only the tools you need for the specific query.
- Prefer 1–4 tool calls that give good coverage.
- If DataForSEO is unavailable (tools missing), rely on GSC + reasoning.
- If tools fail, still give a best-effort answer using whatever data you have.

NEVER
- Never expose raw JSON tool responses directly.
- Never leak internal errors or stack traces; summarize failures briefly.
""".strip()


# ---------- Middleware for tool error handling ----------

@wrap_tool_call
async def handle_tool_errors(request, handler):
    """
    Handle tool execution errors with custom messages (async version).
    
    This middleware catches tool errors and returns a ToolMessage instead of crashing.
    Uses the @wrap_tool_call decorator pattern expected by LangChain.
    """
    tool_name = request.tool_call.get("name") if request.tool_call else "unknown_tool"
    
    try:
        result = await handler(request)
        return result
    except Exception as exc:
        # Log server-side, but only send a clean message back through the LLM
        tb = traceback.format_exc(limit=3)
        print(f"[SEOAgent] Tool error in {tool_name}:", exc)
        print(tb)
        
        # Return a ToolMessage that the LLM can see and handle
        error_msg = (
            f"Tool error in {tool_name}: {type(exc).__name__}: {str(exc)[:200]}. "
            "Please use available information to still provide a useful SEO answer."
        )
        
        tool_call_id = None
        if request.tool_call and "id" in request.tool_call:
            tool_call_id = request.tool_call["id"]
        
        return ToolMessage(
            content=error_msg,
            tool_call_id=tool_call_id,
        )


# ---------- SEOAgent implementation ----------

class SEOAgent:
    """
    SEOAgent wraps a LangChain MCP client + ChatOpenAI model into a tool-calling agent.

    - Uses GSC MCP server (local Python)
    - Optionally uses DataForSEO MCP server (HTTP / Cloudflare Worker)
    - Exposes: get_tools(), run(), stream()
    """

    def __init__(
        self,
        llm: ChatOpenAI,
        system_prompt: Optional[str] = None,
    ):
        self.name = "SEO Agent"
        self.description: str = system_prompt or DEFAULT_SEO_SYSTEM_PROMPT
        self.model = llm

        self._tools = None
        self._agent = None
        self._tool_warning: Optional[str] = None
        self._mcp_client: Optional[MultiServerMCPClient] = None

    # ------------------------------------------------------------------
    # MCP server config
    # ------------------------------------------------------------------

    def _build_gsc_server_config(self) -> Dict[str, Any]:
        """Build configuration for the local GSC MCP server."""
        # Handle both notebook and script execution
        try:
            # Try to use __file__ if available (script execution)
            current_dir = Path(__file__).parent
            project_root = current_dir.parent.parent
        except NameError:
            # Fallback for notebook execution: use current working directory
            # Assume notebook is run from project root
            project_root = Path.cwd()
        
        gsc_server_path = project_root / "src" / "tools" / "gsc_server.py"

        python_interpreter = sys.executable

        return {
            "command": python_interpreter,
            "args": [str(gsc_server_path)],
            "transport": "stdio",
            "env": {
                "GSC_CREDENTIALS": os.getenv("GSC_CREDENTIALS", ""),
                "GSC_SKIP_OAUTH": os.getenv("GSC_SKIP_OAUTH", "true"),
            },
        }

    def _build_dataforseo_config(self) -> Optional[Dict[str, Any]]:
        """Build configuration for the optional DataForSEO MCP server."""
        enable_remote = os.getenv("ENABLE_DATAFORSEO_MCP", "true").lower() in ("1", "true", "yes")
        if not enable_remote:
            return None

        default_url = "https://dataforseo-mcp-worker.hitesh-solanki.workers.dev/mcp"
        dataforseo_url = os.getenv("DATAFORSEO_MCP_URL", default_url).strip()
        if not dataforseo_url:
            return None

        config: Dict[str, Any] = {
            "transport": "streamable_http",
            "url": dataforseo_url,
        }

        timeout = os.getenv("DATAFORSEO_MCP_TIMEOUT")
        if timeout:
            try:
                config["timeout"] = float(timeout)
            except ValueError:
                pass

        auth_header = os.getenv("DATAFORSEO_MCP_AUTH_HEADER")
        if auth_header:
            config["headers"] = {"Authorization": auth_header.strip()}

        return config

    def _build_server_config(self, include_dataforseo: bool = True) -> Dict[str, Any]:
        """Build the MCP server configuration dictionary."""
        servers: Dict[str, Any] = {
            "gscServer": self._build_gsc_server_config()
        }

        if include_dataforseo:
            dataforseo_config = self._build_dataforseo_config()
            if dataforseo_config:
                servers["dataforseo"] = dataforseo_config

        return servers

    def get_mcp_client(self, include_dataforseo: bool = True) -> MultiServerMCPClient:
        """Initialize and return the MCP client with configured servers."""
        config = self._build_server_config(include_dataforseo=include_dataforseo)
        return MultiServerMCPClient(config)

    # ------------------------------------------------------------------
    # Tools & agent
    # ------------------------------------------------------------------

    async def get_tools(self):
        """Get tools from MCP servers. Caches tools for reuse."""
        if self._tools is None:
            print("[SEOAgent] Loading tools from MCP servers...")
            self._tool_warning = None

            # Try loading with both servers first
            client = self.get_mcp_client(include_dataforseo=True)
            try:
                print("[SEOAgent] Attempting to connect to GSC + DataForSEO servers...")
                self._mcp_client = client
                self._tools = await client.get_tools()
                print(f"[SEOAgent] Successfully loaded {len(self._tools)} tools")
                return self._tools
            except Exception as exc:
                print(f"[SEOAgent] Failed to load with both servers: {type(exc).__name__}: {str(exc)[:200]}")
                # If DataForSEO fails, fall back to GSC-only
                server_config = self._build_server_config(include_dataforseo=True)
                if "dataforseo" in server_config:
                    print("[SEOAgent] Falling back to GSC-only configuration...")
                    fallback_client = self.get_mcp_client(include_dataforseo=False)
                    try:
                        details = str(exc)
                        if len(details) > 500:
                            details = details[:500] + "... (truncated)"
                        self._tool_warning = (
                            "⚠️ DataForSEO MCP server could not be reached. "
                            "Continuing with Google Search Console tools only.\n"
                            f"Details: {details}"
                        )
                        self._mcp_client = fallback_client
                        self._tools = await fallback_client.get_tools()
                        print(f"[SEOAgent] Successfully loaded {len(self._tools)} tools (GSC only)")
                        return self._tools
                    except Exception as fallback_exc:
                        print(f"[SEOAgent] Fallback also failed: {type(fallback_exc).__name__}: {str(fallback_exc)[:200]}")
                        raise fallback_exc from exc
                # No fallback available, re-raise
                raise
        else:
            print(f"[SEOAgent] Using cached tools ({len(self._tools)} tools)")

        return self._tools

    def get_tool_warning(self) -> Optional[str]:
        """Return any warning generated while loading tools."""
        return self._tool_warning

    def update_system_prompt(self, new_prompt: str):
        """Update the system prompt and invalidate the cached agent."""
        self.description = new_prompt
        self._agent = None  # recreate agent with new prompt on next call

    async def get_agent(self):
        """Get or create the agent with tools and error handling middleware."""
        if self._agent is None:
            print("[SEOAgent] Creating new agent instance...")
            tools = await self.get_tools()

            self._agent = create_agent(
                model=self.model,
                tools=tools,
                system_prompt=self.description,
                middleware=[handle_tool_errors],
            )
            print("[SEOAgent] Agent created successfully")
        else:
            print("[SEOAgent] Using cached agent instance")

        return self._agent

    # ------------------------------------------------------------------
    # Public run / stream APIs
    # ------------------------------------------------------------------

    async def run(self, messages):
        """
        Run the agent with the given messages.
        messages: list of dicts or LangChain messages; typically:
          [
            {"role": "system", "content": "..."} (optional)
            {"role": "user", "content": "Provide SEO analysis for strique.io"}
          ]
        """
        print(f"[SEOAgent] Running agent with {len(messages)} message(s)")
        agent = await self.get_agent()
        print("[SEOAgent] Invoking agent...")
        result = await agent.ainvoke({"messages": messages})
        print(f"[SEOAgent] Agent invocation complete, result type: {type(result)}")
        return result

    async def stream(self, messages):
        """
        Stream agent responses as they are generated.
        Yields chunks with 'messages' / 'content' depending on your setup.
        """
        agent = await self.get_agent()
        async for chunk in agent.astream({"messages": messages}, stream_mode="values"):
            yield chunk


In [22]:
# src/agents/router_agent.py
import json
from typing import Literal, TypedDict
from langchain_openai import ChatOpenAI

RouteType = Literal["seo", "other", "mixed"]

class RouterOutput(TypedDict):
    route: RouteType
    confidence: float

ROUTER_SYSTEM_PROMPT = """
You are a simple routing classifier.

Your job:
- Decide if the user query is about SEO, something else, or both.

Valid routes:
- "seo"   -> Search Engine Optimization, keywords, traffic, ranking, SERP, GSC, DataForSEO, etc.
- "other" -> Any non-SEO query.
- "mixed" -> Contains both SEO and non-SEO aspects.

Return STRICT JSON only, no explanation:
{"route": "seo" | "other" | "mixed", "confidence": 0.0-1.0}
""".strip()


class RouterAgent:
    def __init__(self, llm: ChatOpenAI):
        self.llm = llm

    async def route(self, user_query: str) -> RouterOutput:
        messages = [
            {"role": "system", "content": ROUTER_SYSTEM_PROMPT},
            {"role": "user", "content": user_query},
        ]
        resp = await self.llm.ainvoke(messages)

        try:
            data = json.loads(resp.content)
            return {
                "route": data.get("route", "other"),
                "confidence": float(data.get("confidence", 0.6)),
            }
        except Exception:
            return {"route": "other", "confidence": 0.5}


In [23]:
# src/agents/orchestrator.py
import asyncio
import json
from typing import Any, Dict
from langchain_core.messages import BaseMessage

from langchain_openai import ChatOpenAI

# Optional: simple summarizer so SEOAgent can focus on data + insights
SUMMARIZER_SYSTEM_PROMPT = """
You are a summarization assistant.

Input:
- The original user query
- The raw assistant output from the SEO agent (may contain tool reasoning)

Your job:
- Clean it up.
- Keep it concise and actionable.
- Use small sections like:
  - Summary
  - Key insights
  - Recommended next steps

Output plain text, no JSON.
""".strip()


class Orchestrator:
    def __init__(self):
        # You can tune these (router can be mini, SEO full, etc.)
        self.router_llm = ChatOpenAI(model="gpt-4.1", temperature=0.0)
        self.summarizer_llm = ChatOpenAI(model="gpt-4.1", temperature=0.4)
        self.seo_llm = ChatOpenAI(model="gpt-5.1", temperature=0.2)

        self.router = RouterAgent(self.router_llm)
        self.seo_agent = SEOAgent(self.seo_llm)

    def _serialize_messages(self, messages) -> str:
        """Convert LangChain message objects to a serializable format."""
        serialized = []
        for msg in messages:
            if isinstance(msg, BaseMessage):
                # Convert LangChain message to dict
                msg_dict = {
                    "type": msg.__class__.__name__,
                    "content": msg.content if hasattr(msg, "content") else str(msg),
                }
                # Add additional fields if they exist
                if hasattr(msg, "name") and msg.name:
                    msg_dict["name"] = msg.name
                if hasattr(msg, "tool_call_id") and msg.tool_call_id:
                    msg_dict["tool_call_id"] = msg.tool_call_id
                serialized.append(msg_dict)
            elif isinstance(msg, dict):
                serialized.append(msg)
            else:
                serialized.append({"type": type(msg).__name__, "content": str(msg)})
        return json.dumps(serialized, ensure_ascii=False, indent=2)

    async def _summarize(self, user_query: str, raw_answer: str) -> str:
        print(f"[Orchestrator] Summarizing answer (length: {len(raw_answer)} chars)")
        messages = [
            {"role": "system", "content": SUMMARIZER_SYSTEM_PROMPT},
            {
                "role": "user",
                "content": f"User query:\n{user_query}\n\nRaw SEO agent answer:\n{raw_answer}",
            },
        ]
        resp = await self.summarizer_llm.ainvoke(messages)
        return resp.content

    async def handle_query(self, user_query: str) -> str:
        print(f"[Orchestrator] Handling query: {user_query[:100]}...")
        
        # 1) ROUTER
        print("[Orchestrator] Step 1: Routing query...")
        route_result = await self.router.route(user_query)
        route = route_result["route"]
        print(f"[Orchestrator] Route determined: {route} (confidence: {route_result.get('confidence', 'N/A')})")

        # 2) If not SEO, you can wire a generic agent here; for now, we just answer via LLM directly
        if route == "other":
            print("[Orchestrator] Non-SEO route, using generic assistant...")
            resp = await self.summarizer_llm.ainvoke(
                [
                    {
                        "role": "system",
                        "content": "You are a helpful general assistant. Answer briefly.",
                    },
                    {"role": "user", "content": user_query},
                ]
            )
            return resp.content

        # 3) SEO route → call SEOAgent (this is where your MCP tools kick in)
        print("[Orchestrator] Step 2: Calling SEO agent...")
        messages = [
            # The SEOAgent already has its own system prompt; we only send user content here.
            {"role": "user", "content": user_query}
        ]
        seo_result = await self.seo_agent.run(messages)
        print(f"[Orchestrator] SEO agent returned result type: {type(seo_result)}")
        print(f"[Orchestrator] SEO result keys: {list(seo_result.keys()) if isinstance(seo_result, dict) else 'N/A (not a dict)'}")

        # `seo_result` shape depends on LangChain version; usually seo_result["messages"] or seo_result["output"]
        # Convert LangChain messages to serializable format
        raw_answer = ""
        if isinstance(seo_result, dict):
            # Try common keys
            if "output" in seo_result:
                print("[Orchestrator] Found 'output' key in result")
                raw_answer = str(seo_result["output"])
            elif "messages" in seo_result:
                print(f"[Orchestrator] Found 'messages' key in result ({len(seo_result['messages'])} messages)")
                raw_answer = self._serialize_messages(seo_result["messages"])
            else:
                print("[Orchestrator] No 'output' or 'messages' key, serializing entire result")
                # Try to serialize, but handle non-serializable objects
                try:
                    raw_answer = json.dumps(seo_result, ensure_ascii=False, default=str)
                except Exception as e:
                    print(f"[Orchestrator] JSON serialization failed: {e}, using str()")
                    raw_answer = str(seo_result)
        else:
            print("[Orchestrator] Result is not a dict, converting to string")
            raw_answer = str(seo_result)

        print(f"[Orchestrator] Step 3: Summarizing raw answer ({len(raw_answer)} chars)...")

        # 4) Summarize / polish for final user-facing answer
        final_answer = await self._summarize(user_query, raw_answer)

        # Optionally prepend any tool warning (DataForSEO fallback)
        warning = self.seo_agent.get_tool_warning()
        if warning:
            print("[Orchestrator] Adding tool warning to final answer")
            final_answer = f"{warning}\n\n{final_answer}"

        print("[Orchestrator] Query handling complete!")
        return final_answer


# Quick CLI test
async def main():
    orch = Orchestrator()
    query = "Provide SEO analysis and growth strategy for domain strique.io"
    answer = await orch.handle_query(query)
    print("\n========== FINAL ANSWER ==========\n")
    print(answer)



In [24]:
await main()

[Orchestrator] Handling query: Provide SEO analysis and growth strategy for domain strique.io...
[Orchestrator] Step 1: Routing query...
[Orchestrator] Route determined: seo (confidence: 1.0)
[Orchestrator] Step 2: Calling SEO agent...
[SEOAgent] Running agent with 1 message(s)
[SEOAgent] Creating new agent instance...
[SEOAgent] Loading tools from MCP servers...
[SEOAgent] Attempting to connect to GSC + DataForSEO servers...
[SEOAgent] Successfully loaded 86 tools
[SEOAgent] Agent created successfully
[SEOAgent] Invoking agent...
[SEOAgent] Agent invocation complete, result type: <class 'dict'>
[Orchestrator] SEO agent returned result type: <class 'dict'>
[Orchestrator] SEO result keys: ['messages']
[Orchestrator] Found 'messages' key in result (7 messages)
[Orchestrator] Step 3: Summarizing raw answer (10815 chars)...
[Orchestrator] Summarizing answer (length: 10815 chars)
[Orchestrator] Query handling complete!


Summary  
Due to temporary tool limitations, this analysis is based on