In [1]:
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated
from langchain_core.messages import (
    SystemMessage,
    HumanMessage,
    AIMessage,
    ToolMessage
)
import operator

from dotenv import load_dotenv
import os
from langchain_openai import ChatOpenAI
import asyncio

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
LLM_URL = os.getenv("LLM_URL")
LLM_MODEL = os.getenv("LLM_MODEL")

In [3]:
import os
import asyncio
import httpx
from datetime import datetime
from typing import List, Optional, Union
from pydantic import BaseModel

class SearchResult(BaseModel):
    url: str
    title: Optional[str] = None
    snippet: Optional[str] = None
    engine: Optional[str] = None
    score: Optional[float] = None
    extracted_at: str
class SearchError(BaseModel):
    query: str
    error: str
    extracted_at: str
    success: bool = False

class SearxNGSearch:
    def __init__(self, base_url: Optional[str] = None):
        self.base_url = base_url or os.getenv("SEARXNG_BASE_URL", "http://localhost:8888/search")
        if not self.base_url:
            raise ValueError("SEARXNG_BASE_URL is not set")

    async def _fetch_page(
        self,
        client: httpx.AsyncClient,
        query: str,
        page: int,
        timeout: int,
    ) -> Union[List[dict], str]:
        try:
            params = {
                "q": query,
                "format": "json",
                "pageno": page,
            }
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
                "Accept": "application/json",
            }
            r = await client.get(self.base_url, params=params, timeout=timeout, headers=headers)
            r.raise_for_status()
            return r.json().get("results", [])
        except Exception as e:
            return f"Page {page} failed: {e}"

    async def _search_async(
        self,
        query: str,
        max_pages: int,
        timeout: int,
        max_results: Optional[int],
        output_for_llm: bool,
    ) -> Union[List[SearchResult], str, SearchError]:

        collected: List[SearchResult] = []

        async with httpx.AsyncClient() as client:
            tasks = [
                self._fetch_page(client, query, page, timeout)
                for page in range(1, max_pages + 1)
            ]
            pages = await asyncio.gather(*tasks)

        for page in pages:
            if isinstance(page, str):
                return SearchError(
                    query=query,
                    error=page,
                    extracted_at=datetime.now().isoformat(),
                )

            for item in page:
                collected.append(
                    SearchResult(
                        url=item.get("url"),
                        title=item.get("title"),
                        snippet=item.get("content"),  # SearxNG snippet
                        engine=item.get("engine"),
                        score=item.get("score"),
                        extracted_at=datetime.now().isoformat(),
                    )
                )

        if max_results:
            collected.sort(
                key=lambda x: x.score if x.score is not None else 0,
                reverse=True,
            )
            collected = collected[:max_results]

        if output_for_llm:
            return "\n".join(r.model_dump_json() for r in collected)

        return collected

    async def asearch(
        self,
        query: str,
        *,
        max_pages: int = 3,
        timeout: int = 10,
        max_results: Optional[int] = None,
        output_for_llm: bool = False,
    ):
        return await self._search_async(
            query=query,
            max_pages=max_pages,
            timeout=timeout,
            max_results=max_results,
            output_for_llm=output_for_llm,
        )

    def search(
        self,
        query: str,
        *,
        max_pages: int = 3,
        timeout: int = 10,
        max_results: Optional[int] = None,
        output_for_llm: bool = False,
    ) -> Union[List[SearchResult], str, SearchError]:

        return asyncio.run(
            self._search_async(
                query=query,
                max_pages=max_pages,
                timeout=timeout,
                max_results=max_results,
                output_for_llm=output_for_llm,
            )
        )


In [27]:
class AgentState(TypedDict):
    messages: Annotated[list, operator.add]
    company: str
    indicator: str

class Controller:
    def __init__(self):
        self.llm = ChatOpenAI(
            base_url=LLM_URL,      
            api_key=OPENAI_API_KEY,  
            model=LLM_MODEL,
            temperature=0.2
        )
    
        self.app = self.build_graph()
        self.searcher = SearxNGSearch()

    async def llm_node(self, state: AgentState):
        company = state["company"]

        system_prompt = SystemMessage(
            content=(
                "You are a web relevance evaluation expert.\n"
                "Your task is to evaluate whether each provided URL is directly related "
                "to the given Japanese company.\n\n"
                "Evaluation rules:\n"
                "- Consider official websites, subsidiaries, IR pages, press releases, "
                "and credible news sources as relevant.\n"
                "- Mark unrelated companies, generic blogs, forums, or ads as not relevant.\n"
                "- Be concise and objective.\n\n"
                "Output format:\n"
                "- URL\n"
                "- Relevance: High / Medium / Low / None\n"
                "- Short justification (1 sentence)"
            )
        )

        urls_text = "\n".join(
            msg.content for msg in state["messages"] if isinstance(msg, HumanMessage)
        )
        
        print(urls_text)

        human_prompt = HumanMessage(
            content=(
                f"Company: {company}\n\n"
                "URLs to evaluate:\n"
                f"{urls_text}"
            )
        )

        messages = [system_prompt, human_prompt]

        response = await self.llm.ainvoke(messages)

        return {
            "messages": [response.content],
        }

    
    async def search_node(
        self,
        state: AgentState,
        max_pages: int = 3,
        max_results: int = 5,
    ):
        company = state["company"]
        indicator = state["indicator"]

        query = f"{company} {indicator}"
        print(f"[SEARCH] Query: {query}")

        results = await self.searcher.asearch(
            query=query,
            max_pages=max_pages,
            max_results=max_results,
            output_for_llm=False,
        )
        print("results",results)

        urls = []
        for r in results:
            urls.append(r.url)

        urls_text = "\n".join(urls)
        print("urls_text",urls_text)

        return {
            "messages": [
                HumanMessage(
                    content=(
                        "The following URLs were retrieved from web search:\n"
                        f"{urls_text}"
                    )
                )
            ],
            "search_results": results,
        }


    def build_graph(self):
        graph = StateGraph(AgentState)

        graph.add_node("searcher", self.search_node)
        graph.add_node("llm_node", self.llm_node)

        graph.set_entry_point("searcher")
        graph.add_edge("searcher", "llm_node")
        graph.add_edge("llm_node", END)

        app = graph.compile()

        return app
    
    async def run(self, user_input: str, company: str, indicator: str):
        initial_state: AgentState = {
            "messages": [HumanMessage(content=user_input)] if user_input else [],
            "company": company,
            "indicator": indicator,
        }

        result = await self.app.ainvoke(initial_state)
        
        return result


In [33]:
controller = Controller()

user_input = ""
company = "鹿島建設"
indicator = "株主優待の有無・内容"

result = await controller.run(
    user_input = user_input,
    company=company,
    indicator=indicator
)
last_message = result["messages"][-1]
print(last_message)


[SEARCH] Query: 鹿島建設 株主優待の有無・内容
results []
urls_text 
The following URLs were retrieved from web search:

I notice that you've mentioned "The following URLs were retrieved from web search:" but haven't provided any actual URLs for evaluation. 

To properly assess the relevance of URLs to 鹿島建設 (Kajima Corporation), I need the specific URLs to analyze. Please provide the list of URLs you'd like me to evaluate for their relevance to this Japanese construction company.

Once you share the URLs, I'll evaluate each one based on whether it's directly related to 鹿島建設, considering factors like official websites, subsidiaries, IR pages, press releases, and credible news sources.
