In [38]:
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated
from langchain_core.messages import (
    SystemMessage,
    HumanMessage,
    AIMessage,
    ToolMessage
)
import operator

from dotenv import load_dotenv
import os
from langchain_openai import ChatOpenAI
import asyncio

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
LLM_URL = os.getenv("LLM_URL")
LLM_MODEL = os.getenv("LLM_MODEL")

In [8]:
SYSTEM_PROMPT = '''
    
'''

In [19]:
import os
import asyncio
import httpx
from datetime import datetime
from typing import List, Optional, Union
from pydantic import BaseModel

class SearchResult(BaseModel):
    url: str
    title: Optional[str] = None
    snippet: Optional[str] = None
    engine: Optional[str] = None
    score: Optional[float] = None
    extracted_at: str
class SearchError(BaseModel):
    query: str
    error: str
    extracted_at: str
    success: bool = False

class SearxNGSearch:
    def __init__(self, base_url: Optional[str] = None):
        self.base_url = base_url or os.getenv("SEARXNG_BASE_URL", "http://localhost:8888/search")
        if not self.base_url:
            raise ValueError("SEARXNG_BASE_URL is not set")

    async def _fetch_page(
        self,
        client: httpx.AsyncClient,
        query: str,
        page: int,
        timeout: int,
    ) -> Union[List[dict], str]:
        try:
            params = {
                "q": query,
                "format": "json",
                "pageno": page,
            }
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
                "Accept": "application/json",
            }
            r = await client.get(self.base_url, params=params, timeout=timeout, headers=headers)
            r.raise_for_status()
            return r.json().get("results", [])
        except Exception as e:
            return f"Page {page} failed: {e}"

    async def _search_async(
        self,
        query: str,
        max_pages: int,
        timeout: int,
        max_results: Optional[int],
        output_for_llm: bool,
    ) -> Union[List[SearchResult], str, SearchError]:

        collected: List[SearchResult] = []

        async with httpx.AsyncClient() as client:
            tasks = [
                self._fetch_page(client, query, page, timeout)
                for page in range(1, max_pages + 1)
            ]
            pages = await asyncio.gather(*tasks)

        for page in pages:
            if isinstance(page, str):
                return SearchError(
                    query=query,
                    error=page,
                    extracted_at=datetime.now().isoformat(),
                )

            for item in page:
                collected.append(
                    SearchResult(
                        url=item.get("url"),
                        title=item.get("title"),
                        snippet=item.get("content"),  # SearxNG snippet
                        engine=item.get("engine"),
                        score=item.get("score"),
                        extracted_at=datetime.now().isoformat(),
                    )
                )

        if max_results:
            collected.sort(
                key=lambda x: x.score if x.score is not None else 0,
                reverse=True,
            )
            collected = collected[:max_results]

        if output_for_llm:
            return "\n".join(r.model_dump_json() for r in collected)

        return collected

    async def asearch(
        self,
        query: str,
        *,
        max_pages: int = 3,
        timeout: int = 10,
        max_results: Optional[int] = None,
        output_for_llm: bool = False,
    ):
        return await self._search_async(
            query=query,
            max_pages=max_pages,
            timeout=timeout,
            max_results=max_results,
            output_for_llm=output_for_llm,
        )

    def search(
        self,
        query: str,
        *,
        max_pages: int = 3,
        timeout: int = 10,
        max_results: Optional[int] = None,
        output_for_llm: bool = False,
    ) -> Union[List[SearchResult], str, SearchError]:

        return asyncio.run(
            self._search_async(
                query=query,
                max_pages=max_pages,
                timeout=timeout,
                max_results=max_results,
                output_for_llm=output_for_llm,
            )
        )


In [53]:
class AgentState(TypedDict):
    messages: Annotated[list, operator.add]
    company: str
    indicator: str
    search_results: Optional[List[SearchResult]] = None

class Controller:
    def __init__(self):
        self.llm = ChatOpenAI(
            base_url=LLM_URL,      
            api_key=OPENAI_API_KEY,  
            model=LLM_MODEL,
            temperature=0.2
        )
    
        self.app = self.build_graph()
        self.searcher = SearxNGSearch()

    async def llm_node(self, state: AgentState):
        messages = [SystemMessage(content=SYSTEM_PROMPT)]
        
        company = state["company"]
        search_results = state.get("search_results")
        
        # If we have search results, ask LLM to filter relevant ones
        if search_results:
            links_text = "\n".join([
                f"{i+1}. Title: {r.title}\n   URL: {r.url}\n   Snippet: {r.snippet[:200]}"
                for i, r in enumerate(search_results)
            ])
            
            filter_prompt = f"""
            以下は、会社「{company}」に関する検索結果です。
            どのリンクが実際に「{company}」に関連しているかを確認してください。
            関連があるリンクのみを返してください。

            検索結果:
            {links_text}

            関連があるリンクを識別して、理由と共に列挙してください。
            """
            messages.append(HumanMessage(content=filter_prompt))
        else:
            # Handle other message types
            for msg in state["messages"]:
                if isinstance(msg, str):
                    messages.append(HumanMessage(content=msg))
                else:
                    messages.append(msg)
        
        print("Sending messages to LLM:")
        print(f"Prompt length: {len(str(messages))}")
        
        response = await self.llm.ainvoke(messages)
        return {
            "messages": [response.content],
        }
    
    async def search_node(self, state: AgentState, max_pages=3, max_results=5):
        company = state["company"]
        indicator = state["indicator"]

        query = f"{company}の{indicator}"
        print(f"Searching for: {query}")
        
        results = await self.searcher.asearch(
            query=query,
            max_pages=max_pages,
            max_results=max_results,
            output_for_llm=False, 
        )
        
        # Handle search errors
        if isinstance(results, SearchError):
            error_msg = f"Search error: {results.error}"
            print(error_msg)
            return {
                "messages": [error_msg],
            }
        
        # Extract and print URLs
        if isinstance(results, list):
            print(f"\nFound {len(results)} results:")
            for i, r in enumerate(results, 1):
                print(f"  {i}. {r.title}")
                print(f"     URL: {r.url}")
        
        return {
            "messages": [],
            "search_results": results,
        }

    def build_graph(self):
        graph = StateGraph(AgentState)

        graph.add_node("searcher", self.search_node)
        graph.add_node("llm_node", self.llm_node)

        graph.set_entry_point("searcher")
        graph.add_edge("searcher", "llm_node")
        graph.add_edge("llm_node", END)

        app = graph.compile()

        return app
    
    async def run(self, user_input: str, company: str, indicator: str):
        initial_state: AgentState = {
            "messages": [HumanMessage(content=user_input)] if user_input else [],
            "company": company,
            "indicator": indicator,
            "search_results": None,
        }

        result = await self.app.ainvoke(initial_state)
        
        return result


In [54]:
controller = Controller()

user_input = ""
company = "鹿島建設"
indicator = "株主優待の有無・内容"

result = await controller.run(
    user_input = user_input,
    company=company,
    indicator=indicator
)

print(result)


Searching for: 鹿島建設の株主優待の有無・内容

Found 5 results:
  1. よくあるご質問（FAQ） | 鹿島建設株式会社
     URL: https://www.kajima.co.jp/ir/faq/?mode=pc
  2. 株主優待 - 鹿島【1812】 - Yahoo!ファイナンス
     URL: https://finance.yahoo.co.jp/quote/1812.O/incentive
  3. 会社概要｜京王プラザホテル（新宿）【公式】
     URL: https://www.keioplaza.co.jp/corporate/corporate/
  4. 鹿島建設 (1812) の株主優待・優待利回り・配当利回り
     URL: https://moneyworld.jp/stock/1812/yutai
  5. 【表紙】 - 株式会社ナカボーテック
     URL: https://www.nakabohtec.co.jp/wp-content/uploads/2025/03/yukashoken75.pdf
Sending messages to LLM:
Prompt length: 1242

Found 5 results:
  1. よくあるご質問（FAQ） | 鹿島建設株式会社
     URL: https://www.kajima.co.jp/ir/faq/?mode=pc
  2. 株主優待 - 鹿島【1812】 - Yahoo!ファイナンス
     URL: https://finance.yahoo.co.jp/quote/1812.O/incentive
  3. 会社概要｜京王プラザホテル（新宿）【公式】
     URL: https://www.keioplaza.co.jp/corporate/corporate/
  4. 鹿島建設 (1812) の株主優待・優待利回り・配当利回り
     URL: https://moneyworld.jp/stock/1812/yutai
  5. 【表紙】 - 株式会社ナカボーテック
     URL: https://www.nakabohtec.co.jp/wp-content/uploads/2025/