In [12]:
import os
import asyncio
import httpx
from datetime import datetime
from typing import List, Optional, Union
from pydantic import BaseModel


# =========================
# Data model (Search only)
# =========================

class SearchResult(BaseModel):
    url: str
    title: Optional[str] = None
    snippet: Optional[str] = None
    engine: Optional[str] = None
    score: Optional[float] = None
    extracted_at: str


class SearchError(BaseModel):
    query: str
    error: str
    extracted_at: str
    success: bool = False


# =========================
# SearxNG Search Step 1
# =========================

class SearxNGSearch:
    def __init__(self, base_url: Optional[str] = None):
        self.base_url = base_url or os.getenv("SEARXNG_BASE_URL", "http://localhost:8888/search")
        if not self.base_url:
            raise ValueError("SEARXNG_BASE_URL is not set")

    async def _fetch_page(
        self,
        client: httpx.AsyncClient,
        query: str,
        page: int,
        timeout: int,
    ) -> Union[List[dict], str]:
        try:
            params = {
                "q": query,
                "format": "json",
                "pageno": page,
            }
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
                "Accept": "application/json",
            }
            r = await client.get(self.base_url, params=params, timeout=timeout, headers=headers)
            r.raise_for_status()
            return r.json().get("results", [])
        except Exception as e:
            return f"Page {page} failed: {e}"

    async def _search_async(
        self,
        query: str,
        max_pages: int,
        timeout: int,
        max_results: Optional[int],
        output_for_llm: bool,
    ) -> Union[List[SearchResult], str, SearchError]:

        collected: List[SearchResult] = []

        async with httpx.AsyncClient() as client:
            tasks = [
                self._fetch_page(client, query, page, timeout)
                for page in range(1, max_pages + 1)
            ]
            pages = await asyncio.gather(*tasks)

        for page in pages:
            if isinstance(page, str):
                return SearchError(
                    query=query,
                    error=page,
                    extracted_at=datetime.now().isoformat(),
                )

            for item in page:
                collected.append(
                    SearchResult(
                        url=item.get("url"),
                        title=item.get("title"),
                        snippet=item.get("content"),  # SearxNG snippet
                        engine=item.get("engine"),
                        score=item.get("score"),
                        extracted_at=datetime.now().isoformat(),
                    )
                )

        if max_results:
            collected.sort(
                key=lambda x: x.score if x.score is not None else 0,
                reverse=True,
            )
            collected = collected[:max_results]

        if output_for_llm:
            return "\n".join(r.model_dump_json() for r in collected)

        return collected

    async def asearch(
        self,
        query: str,
        *,
        max_pages: int = 3,
        timeout: int = 10,
        max_results: Optional[int] = None,
        output_for_llm: bool = False,
    ):
        return await self._search_async(
            query=query,
            max_pages=max_pages,
            timeout=timeout,
            max_results=max_results,
            output_for_llm=output_for_llm,
        )

    def search(
        self,
        query: str,
        *,
        max_pages: int = 3,
        timeout: int = 10,
        max_results: Optional[int] = None,
        output_for_llm: bool = False,
    ) -> Union[List[SearchResult], str, SearchError]:

        return asyncio.run(
            self._search_async(
                query=query,
                max_pages=max_pages,
                timeout=timeout,
                max_results=max_results,
                output_for_llm=output_for_llm,
            )
        )


# =========================
# Example usage (Step 1)
# =========================

if __name__ == "__main__":
    searcher = SearxNGSearch()

    results = await searcher.asearch(
    query="ÈπøÂ≥∂Âª∫Ë®≠Ê†™‰∏ªÂÑ™ÂæÖ„ÅÆÊúâÁÑ°„ÉªÂÜÖÂÆπ",
    max_pages=3,
    max_results=5,
    output_for_llm=False,   # üëà
    )

    content = []
    for result in results:
        content.append(result.snippet)
        print(result)
    text = "\n\n".join(f"- {c.strip()}" for c in content)
    print(text)



url='https://www.kajima.co.jp/ir/faq/?mode=pc' title='„Çà„Åè„ÅÇ„Çã„ÅîË≥™ÂïèÔºàFAQÔºâ | ÈπøÂ≥∂Âª∫Ë®≠Ê†™Âºè‰ºöÁ§æ' snippet='ÁèæÂú®„ÄÅÊ†™‰∏ªÂÑ™ÂæÖÂà∂Â∫¶„ÅØÂÆüÊñΩ„Åó„Å¶„Åä„Çä„Åæ„Åõ„Çì„ÄÇ „Éö„Éº„Ç∏„ÅÆ„Éà„ÉÉ„Éó„Å∏Êàª„Çã. Question Ê†™‰∏ªÈÇÑÂÖÉÊñπÈáù„ÄÅÈÖçÂΩìÈáë„ÅÆÊé®Áßª„Å´„Å§„ÅÑ„Å¶Áü•„Çä„Åü„ÅÑ„ÄÇ Answer. ÈÖçÂΩì\xa0...' engine='startpage' score=1.0 extracted_at='2026-01-08T15:08:16.923333'
url='https://www.jreast.co.jp/company/deal/border/' title='ÂúüÂú∞Â¢ÉÁïåÁ´ã‰ºö„ÅÑ„Å´„Å§„ÅÑ„Å¶ÔΩúÊ≥ï‰∫∫„ÅäÂèñÂºï„Å´Èñ¢„Åó„Å¶ - JRÊù±Êó•Êú¨' snippet='‰∫ãÂâçÊâì„Å°Âêà„Çè„Åõ„Åß„ÅØ„ÄÅÂΩìÁ§æ„Ç∞„É´„Éº„Éó‰ºöÁ§æÁ≠â„ÅåÁî≥Ë´ãÂÜÖÂÆπ„ÅÆÁ¢∫Ë™ç„ÇíË°å„ÅÑ„Åæ„Åô„ÄÇ „ÄêÂΩìÁ§æ„Ç∞„É´„Éº„Éó‰ºöÁ§æ„Äë ‚óãÈ¶ñÈÉΩÂúèÊú¨ÈÉ®„ÉªÊ®™ÊµúÊîØÁ§æ„ÉªÂÖ´ÁéãÂ≠êÊîØÁ§æ„ÉªÂ§ßÂÆÆÊîØÁ§æ„ÉªÈ´òÂ¥éÊîØÁ§æ„ÉªÊ∞¥Êà∏ÊîØÁ§æ„ÉªÂçÉËëâ\xa0...' engine='startpage' score=1.0 extracted_at='2026-01-08T15:08:16.923577'
url='https://doda.jp/DodaFront/View/JobSearchList/j_oc__020314S/-op__1/-preBtn__3/' title='„ÄêÁü•ÁöÑË≤°Áî£ÔºàÁü•Ë≤°Ôºâ„ÉªÁâπË®±„ÄëÊñ∞ÁùÄ„ÅÆËª¢ËÅ∑„ÉªÊ±Ç‰