In [46]:
"""
web_content_loader.py

A module for loading web content using LlamaIndex's various web readers.
This module focuses solely on extracting content from web pages without LLM interaction.
"""

try:
    from llama_index.readers.web import SimpleWebPageReader, BeautifulSoupWebReader, TrafilaturaWebReader
    READERS_AVAILABLE = True
except ImportError:
    READERS_AVAILABLE = False
    print("Some web readers are not available. Please install llama-index-readers-web and additional dependencies.")

class WebContentLoader:
    """
    A class to handle web content loading using LlamaIndex's various web readers.
    """
    
    def __init__(self, preferred_reader="beautifulsoup"):
        """
        Initialize the WebContentLoader with a preferred web reader.
        
        Args:
            preferred_reader (str): The preferred reader to use ("trafilatura", "beautifulsoup", or "simple").
        """
        if not READERS_AVAILABLE:
            raise ImportError("Web readers are not installed. Install llama-index-readers-web and dependencies.")
            
        self.preferred_reader = preferred_reader.lower()
        self.readers = {}
        
        # Initialize readers based on availability and compatibility
        try:
            self.readers["simple"] = SimpleWebPageReader()
        except Exception as e:
            print(f"Failed to initialize SimpleWebPageReader: {str(e)}")
            
        try:
            self.readers["beautifulsoup"] = BeautifulSoupWebReader()
        except Exception as e:
            print(f"Failed to initialize BeautifulSoupWebReader: {str(e)}")
            
        try:
            self.readers["trafilatura"] = TrafilaturaWebReader()
        except Exception as e:
            print(f"Failed to initialize TrafilaturaWebReader: {str(e)}")
            
        if not self.readers:
            raise ValueError("No web readers could be initialized. Check your LlamaIndex installation.")
            
        if self.preferred_reader not in self.readers:
            self.preferred_reader = list(self.readers.keys())[0]
            print(f"Preferred reader {preferred_reader} not available. Falling back to {self.preferred_reader}.")
            
        print(f"WebContentLoader initialized with {self.preferred_reader} as preferred reader.")
        print(f"Available readers: {list(self.readers.keys())}")
    
    def load_web_content(self, url):
        """
        Load content from a specified web URL using the preferred web reader.
        If the preferred reader fails, it will try other available readers as fallback.
        
        Args:
            url (str): The URL of the web page to load.
            
        Returns:
            list: List of loaded documents.
        """
        reader_order = [self.preferred_reader] + [r for r in self.readers.keys() if r != self.preferred_reader]
        for reader_name in reader_order:
            try:
                reader = self.readers[reader_name]
                documents = reader.load_data(urls=[url])
                print(f"Successfully loaded content from {url} using {reader_name} reader.")
                return documents
            except Exception as e:
                print(f"Error loading content from {url} with {reader_name} reader: {str(e)}")
                if reader_name == reader_order[-1]:  # Last reader tried
                    raise Exception(f"All readers failed to load content from {url}")
        return []
            
    def save_content_to_file(self, documents, output_file):
        """
        Save the loaded documents to a text file for further processing, including full content.
        
        Args:
            documents (list): List of documents loaded from the web.
            output_file (str): Path to the output file where content will be saved.
        """
        try:
            with open(output_file, 'w', encoding='utf-8') as f:
                for i, doc in enumerate(documents):
                    content = str(doc)
                    f.write(f"Document {i+1} (Full Content):\n")
                    f.write(content)  # Save full content to file
                    f.write("\n\n" + "="*50 + "\n\n")
                    print(f"Document {i+1}: Saved {len(content)} characters to file.")
            print(f"Full content saved to {output_file}")
        except Exception as e:
            print(f"Error saving content to file: {str(e)}")
            raise
            
    def process_web_content(self, url, output_file=None):
        """
        A convenience method to load web content and optionally save it to a file.
        
        Args:
            url (str): The URL of the web page to process.
            output_file (str, optional): Path to save the content. Defaults to None.
            
        Returns:
            list: List of loaded documents.
        """
        # Load content
        documents = self.load_web_content(url)
        
        # Save to file if specified
        if output_file:
            self.save_content_to_file(documents, output_file)
            
        return documents


if __name__ == "__main__":
    # Example usage of the module
    try:
        # Initialize the WebContentLoader instance with Trafilatura as the preferred reader
        web_loader = WebContentLoader(preferred_reader="beautifulsoup")
        
        # Define the URL to scrape (Abraham Lincoln's Wikipedia page)
        URL = "https://docs.tavily.com/documentation/api-reference/endpoint/crawl"
        
        # Process the web content and save it to a file
        output_path = "abraham_lincoln_full_content.txt"
        documents = web_loader.process_web_content(URL, output_path)
        
        # Print a summary of loaded documents
        print(f"Loaded {len(documents)} document(s) from {URL}")
        for i, doc in enumerate(documents):
            content = str(doc)
            print(f"Document {i+1}: Contains {len(content)} characters.")
            print(f"Full content for Document {i+1} has been saved to {output_path}")
            # Display a short preview to avoid console overload
            preview = content
            print(len(preview))
            print("\n" + "="*50 + "\n")
            print(f"Document {i+1} Preview (first 500 characters):\n{preview}")
            print("\n" + "="*50 + "\n")
            
    except Exception as e:
        print(f"An error occurred: {str(e)}")



WebContentLoader initialized with beautifulsoup as preferred reader.
Available readers: ['simple', 'beautifulsoup', 'trafilatura']
Successfully loaded content from https://docs.tavily.com/documentation/api-reference/endpoint/crawl using beautifulsoup reader.
Document 1: Saved 354 characters to file.
Full content saved to abraham_lincoln_full_content.txt
Loaded 1 document(s) from https://docs.tavily.com/documentation/api-reference/endpoint/crawl
Document 1: Contains 354 characters.
Full content for Document 1 has been saved to abraham_lincoln_full_content.txt
354


Document 1 Preview (first 500 characters):
Doc ID: fdef417a-b71d-4c09-9587-cf4b9414dbb6
Text: Vercel Security Checkpoint                  We're verifying your
browser  Website owner? Click here to fix       Vercel Security
Checkpoint | hkg1::1754364685-YcTcPfslSGlwwwoAak7U8ZcZldh1GONE
Enable JavaScript to continue    Vercel Security Checkpoint |
hkg1::1754364685-YcTcPfslSGlwwwoAak7U8ZcZldh1GONE




In [None]:
import asyncio
from llama_index.core import VectorStoreIndex
from llama_index.llms.openai import OpenAI

class UniversalWebDataExtractor:
    def __init__(self):
        self.readers = {
            'static_web': SimpleWebPageReader(html_to_text=True),
            'dynamic_web': SpiderWebReader(api_key="sk-0139f5aa-b748-429b-97d3-fdb5b836e150", mode="scrape"),
            'ecommerce': OxylabsAmazonSearchReader("user", "pass"),
            'search': OxylabsGoogleSearchReader("user", "pass"),
            'documents': DoclingReader()
        }
    
    async def extract_web_content(self, urls):
        """擷取網頁內容"""
        web_docs = self.readers['static_web'].load_data(urls)
        return web_docs
    
    async def extract_dynamic_content(self, url):
        """擷取動態網站內容"""
        dynamic_docs = self.readers['dynamic_web'].load_data(url=url)
        return dynamic_docs
    
    async def search_and_extract(self, query):
        """搜尋並擷取相關內容"""
        search_docs = self.readers['search'].load_data({
            'query': query,
            'parse': True
        })
        return search_docs
    
    def build_rag_system(self, documents):
        """建構 RAG 系統"""
        index = VectorStoreIndex.from_documents(documents)
        query_engine = index.as_query_engine(
            llm=OpenAI(model="gpt-4"),
            similarity_top_k=5
        )
        return query_engine

# 使用範例
async def main():
    extractor = UniversalWebDataExtractor()
    
    # 擷取多種來源數據
    web_docs = await extractor.extract_web_content([
        "https://docs.llamaindex.ai",
        "https://python.langchain.com"
    ])
    
    search_docs = await extractor.search_and_extract(
        "LlamaIndex web scraping tutorial"
    )
    
    # 合併所有文檔並建構 RAG
    all_docs = web_docs + search_docs
    rag_system = extractor.build_rag_system(all_docs)
    
    # 查詢測試
    response = rag_system.query("How to scrape dynamic websites with LlamaIndex?")
    print(response)

# 執行
asyncio.run(main())



## crawl

In [53]:
import os
from tavily import TavilyClient
from dotenv import load_dotenv
from typing import Any, Dict, List

load_dotenv()

tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
response = tavily_client.crawl("https://docs.tavily.com", instructions="Find all pages on the Python SDK")

print(response)




def extract_sdk_urls(response: Dict[str, Any]) -> List[str]:
    """
    從 TavilyClient.crawl 的回傳結果中擷取所有 URL。
    
    :param response: crawl() 的回傳字典
    :return: 包含所有頁面 URL 的清單
    """
    results = response.get('results', [])
    return [page for page in results if 'url' in page]

# 假設 response 已經是你列印出來的那個 dict
sdk_pages = extract_sdk_urls(response)
for url in sdk_pages:
    print(url)



{'url': 'https://docs.tavily.com/sdk/python/quick-start', 'raw_content': 'Quickstart - Tavily Docs\n\n[Tavily Docs home page![light logo](https://mintlify.s3.us-west-1.amazonaws.com/tavilyai/logo/light.svg)![dark logo](https://mintlify.s3.us-west-1.amazonaws.com/tavilyai/logo/dark.svg)](https://tavily.com/)\n\nSearch...\n\nCtrl KAsk AI\n\n- [Support](mailto:support@tavily.com)\n- [Get an API key](https://app.tavily.com)\n- [Get an API key](https://app.tavily.com)\n\nSearch...\n\nNavigation\n\nPython\n\nQuickstart\n\n[Home](/welcome)[Documentation](/documentation/about)[SDKs](/sdk/python/quick-start)[Examples](/examples/use-cases/chat)[FAQ](/faq/faq)[Changelog](/changelog/changelog)\n\n- [API Playground](https://app.tavily.com/playground)\n- [Community](https://community.tavily.com)\n- [Blog](https://blog.tavily.com)\n\n##### Python\n\n- [Quickstart](/sdk/python/quick-start)\n- [SDK Reference](/sdk/python/reference)\n\n##### JavaScript\n\n- [Quickstart](/sdk/javascript/quick-start)\n- [

In [57]:
print(sdk_pages[0]['raw_content'])

Quickstart - Tavily Docs

[Tavily Docs home page![light logo](https://mintlify.s3.us-west-1.amazonaws.com/tavilyai/logo/light.svg)![dark logo](https://mintlify.s3.us-west-1.amazonaws.com/tavilyai/logo/dark.svg)](https://tavily.com/)

Search...

Ctrl KAsk AI

- [Support](mailto:support@tavily.com)
- [Get an API key](https://app.tavily.com)
- [Get an API key](https://app.tavily.com)

Search...

Navigation

Python

Quickstart

[Home](/welcome)[Documentation](/documentation/about)[SDKs](/sdk/python/quick-start)[Examples](/examples/use-cases/chat)[FAQ](/faq/faq)[Changelog](/changelog/changelog)

- [API Playground](https://app.tavily.com/playground)
- [Community](https://community.tavily.com)
- [Blog](https://blog.tavily.com)

##### Python

- [Quickstart](/sdk/python/quick-start)
- [SDK Reference](/sdk/python/reference)

##### JavaScript

- [Quickstart](/sdk/javascript/quick-start)
- [SDK Reference](/sdk/javascript/reference)

On this page

- [Introduction](#introduction)
- [Quickstart](#qui

## search

In [None]:
from tavily import TavilyClient

tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
response = tavily_client.search("陰屍路 有哪幾集?")

print(response)


# 假設 response 已經是你列印出來的那個 dict
sdk_pages = extract_sdk_urls(response)
for url in sdk_pages:
    print(url)

{'query': '陰屍路 有哪幾集?', 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'url': 'https://today.line.me/tw/v3/article/2DPvZZz', 'title': '永不完結！《陰屍路》兩部續作釋出預告，熟悉的主角瑞克', 'content': '《陰屍路：戴瑞迪克森》、《陰屍路：瑞克與米瓊恩》都是僅有6 集的迷你劇集，前者計畫在2023 年9 月10 日進行首播，後者則是要等到2024 年，喜歡《陰屍路》系列', 'score': 0.78709817, 'raw_content': None}, {'url': 'https://tw.news.yahoo.com/%E9%99%B0%E5%B1%8D%E8%B7%AF-%E5%BD%B1%E9%9B%86%E6%8C%87%E5%8D%97-100957316.html', 'title': '大結局了但故事還沒完！3部《陰屍路》續集衍生影集指南', 'content': '總共11季，177集，陪伴觀眾12年時光，經典喪屍影集《陰屍路》本週上演大結局了！有驚險刺激的交戰，熱淚盈眶的離別，還有溫馨感人的回憶殺收尾，讓觀眾再次見', 'score': 0.7493996, 'raw_content': None}, {'url': 'https://zh.wikipedia.org/zh-hant/%E9%99%B0%E5%B1%8D%E8%B7%AF%E9%9B%86%E6%95%B8%E5%88%97%E8%A1%A8', 'title': '陰屍路集數列表', 'content': '第十一季（2021-2022年） ; 13, 軍閥. Warlords, 洛倫·亞肯內利, 吉姆·巴恩斯& 艾瑞克·蒙特 ; 14, 萬惡之源. The Rotten Core, 馬庫斯·史托克斯（英語：Marcus Stokes）, 艾瑞克·', 'score': 0.68748367, 'raw_content': None}, {'url': 'https://zh.wikipedia.org/zh-hant/%E9%99%B0%E5%B1%8D%E8%B7%AF_(%E

In [61]:
sdk_pages

[{'url': 'https://today.line.me/tw/v3/article/2DPvZZz',
  'title': '永不完結！《陰屍路》兩部續作釋出預告，熟悉的主角瑞克',
  'content': '《陰屍路：戴瑞迪克森》、《陰屍路：瑞克與米瓊恩》都是僅有6 集的迷你劇集，前者計畫在2023 年9 月10 日進行首播，後者則是要等到2024 年，喜歡《陰屍路》系列',
  'score': 0.78709817,
  'raw_content': None},
 {'url': 'https://tw.news.yahoo.com/%E9%99%B0%E5%B1%8D%E8%B7%AF-%E5%BD%B1%E9%9B%86%E6%8C%87%E5%8D%97-100957316.html',
  'title': '大結局了但故事還沒完！3部《陰屍路》續集衍生影集指南',
  'content': '總共11季，177集，陪伴觀眾12年時光，經典喪屍影集《陰屍路》本週上演大結局了！有驚險刺激的交戰，熱淚盈眶的離別，還有溫馨感人的回憶殺收尾，讓觀眾再次見',
  'score': 0.7493996,
  'raw_content': None},
 {'url': 'https://zh.wikipedia.org/zh-hant/%E9%99%B0%E5%B1%8D%E8%B7%AF%E9%9B%86%E6%95%B8%E5%88%97%E8%A1%A8',
  'title': '陰屍路集數列表',
  'content': '第十一季（2021-2022年） ; 13, 軍閥. Warlords, 洛倫·亞肯內利, 吉姆·巴恩斯& 艾瑞克·蒙特 ; 14, 萬惡之源. The Rotten Core, 馬庫斯·史托克斯（英語：Marcus Stokes）, 艾瑞克·',
  'score': 0.68748367,
  'raw_content': None},
 {'url': 'https://zh.wikipedia.org/zh-hant/%E9%99%B0%E5%B1%8D%E8%B7%AF_(%E9%9B%BB%E8%A6%96%E5%8A%87)',
  'title': '陰屍路(電視劇) - 維基百科，自由的百科全書',

In [85]:
# tavily_search.py

import requests, os
from typing import Optional, List, Dict, Any, Union
from dotenv import load_dotenv

load_dotenv()

def tavily_search(
    api_url: str,
    api_key: str,
    query: str,
    auto_parameters: bool = False,
    topic: str = "general",
    search_depth: str = "basic",
    chunks_per_source: Optional[int] = None,
    max_results: Optional[int] = None,
    time_range: Optional[str] = None,
    days: Optional[int] = None,
    start_date: Optional[str] = None,
    end_date: Optional[str] = None,
    include_answer: Optional[Union[bool, str]] = None,
    include_raw_content: Optional[Union[bool, str]] = None,
    include_images: bool = False,
    include_image_descriptions: bool = False,
    include_favicon: bool = False,
    include_domains: Optional[List[str]] = None,
    exclude_domains: Optional[List[str]] = None,
    country: Optional[str] = None,
) -> Dict[str, Any]:
    """
    發送 Tavily Search API 請求並回傳解析後的結果。

    Args:
        api_url (str): Tavily Search API 端點網址。
        api_key (str): API 金鑰。
        query (str): 查詢字串（必填）。
        auto_parameters (bool): 是否自動設定參數（預設為 False）。
        topic (str): 搜尋主題，'general' 或 'news'（預設 'general'）。
        search_depth (str): 搜尋深度，'basic' 或 'advanced'（預設 'basic'）。
        chunks_per_source (Optional[int]): 單一來源最多回傳幾個片段（1~3，僅 advanced 有效）。
        max_results (Optional[int]): 最多回傳幾筆結果（0~20）。
        time_range (Optional[str]): 結果時間範圍（如 'day', 'week', 'month', 'year', 'd', 'w', 'm', 'y'）。
        days (Optional[int]): 回溯天數（僅 topic 為 news 有效）。
        start_date (Optional[str]): 查詢起始日期（YYYY-MM-DD）。
        end_date (Optional[str]): 查詢結束日期（YYYY-MM-DD）。
        include_answer (Optional[Union[bool, str]]): 是否包含 LLM 生成答案（True/'basic'/'advanced'）。
        include_raw_content (Optional[Union[bool, str]]): 是否包含原始內容（True/'markdown'/'text'）。
        include_images (bool): 是否搜尋圖片。
        include_image_descriptions (bool): 圖片是否附說明。
        include_favicon (bool): 是否回傳 favicon。
        include_domains (Optional[List[str]]): 限定搜尋的網域。
        exclude_domains (Optional[List[str]]): 排除搜尋的網域。
        country (Optional[str]): 優先顯示特定國家內容（僅 general 有效）。

    Returns:
        Dict[str, Any]: Tavily API 回傳的 JSON 結果，包含 query, answer, images, results, auto_parameters, response_time 等欄位。

    Raises:
        requests.HTTPError: 若 API 請求失敗時拋出。
    """
    payload = {
        "query": query,
        "auto_parameters": auto_parameters,
        "topic": topic,
        "search_depth": search_depth,
        "include_images": include_images,
        "include_image_descriptions": include_image_descriptions,
        "include_favicon": include_favicon,
    }
    # 可選參數動態加入
    if chunks_per_source is not None:
        payload["chunks_per_source"] = chunks_per_source
    if max_results is not None:
        payload["max_results"] = max_results
    if time_range is not None:
        payload["time_range"] = time_range
    if days is not None:
        payload["days"] = days
    if start_date is not None:
        payload["start_date"] = start_date
    if end_date is not None:
        payload["end_date"] = end_date
    if include_answer is not None:
        payload["include_answer"] = include_answer
    if include_raw_content is not None:
        payload["include_raw_content"] = include_raw_content
    if include_domains is not None:
        payload["include_domains"] = include_domains
    if exclude_domains is not None:
        payload["exclude_domains"] = exclude_domains
    if country is not None:
        payload["country"] = country

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    response = requests.post(api_url, json=payload, headers=headers)
    response.raise_for_status()
    return response.json()

def format_tavily_result(result: dict) -> str:
    """
    將 Tavily 搜尋結果格式化成結構化文本，包含：
      1. 摘要回答（answer）
      2. 全域相關圖片（result['images']）
      3. 各筆搜尋結果及其內嵌圖片（item['images']）
    """
    parts: List[str] = []

    # 1. 摘要回答
    answer = result.get("answer")
    if answer:
        parts.append("=== 摘要回答 ===")
        parts.append(answer.strip())
        parts.append("")

    # 2. 全域相關圖片
    global_images = result.get("images", [])
    if global_images:
        parts.append("=== 全域相關圖片 ===")
        for i, img in enumerate(global_images, 1):
            parts.append(f"[圖片 {i}]")
            parts.append(f"  標題：{img.get('title', 'N/A')}")
            parts.append(f"  描述：{img.get('description', 'N/A')}")
            parts.append(f"  URL：{img.get('url', 'N/A')}")
        parts.append("")

    # 3. 各筆搜尋結果
    parts.append("=== 搜尋結果 ===")
    for idx, item in enumerate(result.get("results", []), 1):
        parts.append(f"-- 來源 {idx} --")
        parts.append(f"標題：{item.get('title', 'N/A')}")
        parts.append(f"網址：{item.get('url', 'N/A')}")
        content = item.get("content")
        if content:
            parts.append("內容摘要：")
            parts.append(content.strip())
        raw = item.get("raw_content")
        if raw:
            parts.append("詳細全文：")
            parts.append(raw.strip())

    return "\n".join(parts)



# 範例使用（請填入你的 API_URL 及 API_KEY）
if __name__ == "__main__":
    API_URL = "https://api.tavily.com/search"
    API_KEY = os.getenv("TAVILY_API_KEY")
    result = tavily_search(
        api_url=API_URL,
        api_key=API_KEY,
        query="2025 網球大滿貫是誰",
        include_answer=True,
        max_results=1,
        include_favicon=True,
        include_raw_content=True,
        search_depth="advanced",
        include_images=True,
        include_image_descriptions=True,
    )
    import json
    print(json.dumps(result, ensure_ascii=False, indent=2))

# 使用範例
# formatted_content = format_tavily_result(result)
# print(formatted_content)


{
  "query": "2025 網球大滿貫是誰",
  "follow_up_questions": null,
  "answer": "扬尼克·辛纳赢得了2025年温网大满贯冠军。他击败了卡洛斯·阿尔卡拉斯。这是辛纳的第四个大满贯冠军。",
  "images": [
    {
      "url": "https://chinese.aljazeera.net/wp-content/uploads/2025/07/AFP__20250713__66ND33Y__v2__HighRes__TopshotTennisGbrWimbledon-1752435673-1752471587.webp?resize=770%2C513&quality=80",
      "title": "辛纳击败阿尔卡拉斯，赢得2025年温网大满贯冠军| 体育新闻| 半岛电视台",
      "description": "A young male tennis player dressed in a white jacket with a small logo on the chest holds and kisses a large gold trophy."
    },
    {
      "url": "https://cms-emer-res.cctvnews.cctv.com/cctv/video/20250722/c6cebca773e94a669539f1150fa0568c/20250722072355418.png",
      "title": "首次缺席大满贯！郑钦文因伤退出2025美网",
      "description": "一位穿着运动装备、手握网球拍、微笑着向外挥手的女网球运动员站在场上。"
    },
    {
      "url": "https://assets.juksy.com/files/articles/134793/6873f5b2ed88f.jpg",
      "title": "Jannik Sinner 擊敗Carlos Alcaraz 贏下2025 溫網男單冠軍！生涯第4 座 ...",
      "description": "A tennis player is preparing to

In [91]:
# tavily_search.py

# 修復 SQLite 版本兼容性 - 必須在導入 CrewAI 之前執行
import sys
try:
    import pysqlite3.dbapi2 as sqlite3
    sys.modules['sqlite3'] = sqlite3
    sys.modules['sqlite3.dbapi2'] = sqlite3
    print(f"✅ 成功啟用 pysqlite3，SQLite 版本: {sqlite3.sqlite_version}")
except ImportError:
    import sqlite3
    print(f"⚠️  使用系統 SQLite，版本: {sqlite3.sqlite_version}")

# 其他標準庫導入
import requests
import os
from typing import Optional, List, Dict, Any, Union, Type
from dotenv import load_dotenv
from pydantic import BaseModel, Field

# CrewAI 導入 - 必須在 SQLite 修復之後
from crewai.tools import BaseTool

load_dotenv()


class TavilySearchInput(BaseModel):
    """Input schema for TavilySearchTool."""
    query: str = Field(..., description="The search query string.")
    topic: str = Field(default="general", description="Search topic: 'general' or 'news'.")
    search_depth: str = Field(default="basic", description="Search depth: 'basic' or 'advanced'.")
    max_results: Optional[int] = Field(default=5, description="Maximum number of results to return (0-20).")
    include_answer: Optional[bool] = Field(default=True, description="Whether to include LLM-generated answer.")
    include_raw_content: Optional[bool] = Field(default=False, description="Whether to include raw content.")
    include_images: bool = Field(default=False, description="Whether to search for images.")
    time_range: Optional[str] = Field(default=None, description="Time range for results (day, week, month, year).")

def tavily_search(
    api_url: str,
    api_key: str,
    query: str,
    auto_parameters: bool = False,
    topic: str = "general",
    search_depth: str = "basic",
    chunks_per_source: Optional[int] = None,
    max_results: Optional[int] = None,
    time_range: Optional[str] = None,
    days: Optional[int] = None,
    start_date: Optional[str] = None,
    end_date: Optional[str] = None,
    include_answer: Optional[Union[bool, str]] = None,
    include_raw_content: Optional[Union[bool, str]] = None,
    include_images: bool = False,
    include_image_descriptions: bool = False,
    include_favicon: bool = False,
    include_domains: Optional[List[str]] = None,
    exclude_domains: Optional[List[str]] = None,
    country: Optional[str] = None,
) -> Dict[str, Any]:
    """
    發送 Tavily Search API 請求並回傳解析後的結果。

    Args:
        api_url (str): Tavily Search API 端點網址。
        api_key (str): API 金鑰。
        query (str): 查詢字串（必填）。
        auto_parameters (bool): 是否自動設定參數（預設為 False）。
        topic (str): 搜尋主題，'general' 或 'news'（預設 'general'）。
        search_depth (str): 搜尋深度，'basic' 或 'advanced'（預設 'basic'）。
        chunks_per_source (Optional[int]): 單一來源最多回傳幾個片段（1~3，僅 advanced 有效）。
        max_results (Optional[int]): 最多回傳幾筆結果（0~20）。
        time_range (Optional[str]): 結果時間範圍（如 'day', 'week', 'month', 'year', 'd', 'w', 'm', 'y'）。
        days (Optional[int]): 回溯天數（僅 topic 為 news 有效）。
        start_date (Optional[str]): 查詢起始日期（YYYY-MM-DD）。
        end_date (Optional[str]): 查詢結束日期（YYYY-MM-DD）。
        include_answer (Optional[Union[bool, str]]): 是否包含 LLM 生成答案（True/'basic'/'advanced'）。
        include_raw_content (Optional[Union[bool, str]]): 是否包含原始內容（True/'markdown'/'text'）。
        include_images (bool): 是否搜尋圖片。
        include_image_descriptions (bool): 圖片是否附說明。
        include_favicon (bool): 是否回傳 favicon。
        include_domains (Optional[List[str]]): 限定搜尋的網域。
        exclude_domains (Optional[List[str]]): 排除搜尋的網域。
        country (Optional[str]): 優先顯示特定國家內容（僅 general 有效）。

    Returns:
        Dict[str, Any]: Tavily API 回傳的 JSON 結果，包含 query, answer, images, results, auto_parameters, response_time 等欄位。

    Raises:
        requests.HTTPError: 若 API 請求失敗時拋出。
    """
    payload = {
        "query": query,
        "auto_parameters": auto_parameters,
        "topic": topic,
        "search_depth": search_depth,
        "include_images": include_images,
        "include_image_descriptions": include_image_descriptions,
        "include_favicon": include_favicon,
    }
    # 可選參數動態加入
    if chunks_per_source is not None:
        payload["chunks_per_source"] = chunks_per_source
    if max_results is not None:
        payload["max_results"] = max_results
    if time_range is not None:
        payload["time_range"] = time_range
    if days is not None:
        payload["days"] = days
    if start_date is not None:
        payload["start_date"] = start_date
    if end_date is not None:
        payload["end_date"] = end_date
    if include_answer is not None:
        payload["include_answer"] = include_answer
    if include_raw_content is not None:
        payload["include_raw_content"] = include_raw_content
    if include_domains is not None:
        payload["include_domains"] = include_domains
    if exclude_domains is not None:
        payload["exclude_domains"] = exclude_domains
    if country is not None:
        payload["country"] = country

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    response = requests.post(api_url, json=payload, headers=headers)
    response.raise_for_status()
    return response.json()

def format_tavily_result(result: dict) -> str:
    """
    將 Tavily 搜尋結果格式化成結構化文本，包含：
      1. 摘要回答（answer）
      2. 全域相關圖片（result['images']）
      3. 各筆搜尋結果及其內嵌圖片（item['images']）
    """

    try:
        parts: List[str] = []

        # 1. 摘要回答
        answer = result.get("answer")
        if answer:
            parts.append("=== 摘要回答 ===")
            parts.append(str(answer).strip())
            parts.append("")

        # 2. 全域相關圖片
        global_images = result.get("images", [])
        if global_images and isinstance(global_images, list):
            parts.append("=== 全域相關圖片 ===")
            for i, img in enumerate(global_images, 1):
                if isinstance(img, dict):
                    parts.append(f"[圖片 {i}]")
                    parts.append(f"  標題：{img.get('title', 'N/A')}")
                    parts.append(f"  描述：{img.get('description', 'N/A')}")
                    parts.append(f"  URL：{img.get('url', 'N/A')}")
                else:
                    parts.append(f"[圖片 {i}] {str(img)}")
            parts.append("")

        # 3. 各筆搜尋結果
        parts.append("=== 搜尋結果 ===")
        results = result.get("results", [])
        if isinstance(results, list):
            for idx, item in enumerate(results, 1):
                parts.append(f"-- 來源 {idx} --")
                if isinstance(item, dict):
                    parts.append(f"標題：{item.get('title', 'N/A')}")
                    parts.append(f"網址：{item.get('url', 'N/A')}")
                    content = item.get("content")
                    if content:
                        parts.append("內容摘要：")
                        parts.append(str(content).strip())
                    raw = item.get("raw_content")
                    if raw:
                        parts.append("詳細全文：")
                        parts.append(str(raw).strip())
                else:
                    parts.append(f"結果：{str(item)}")
                parts.append("")
        else:
            parts.append(f"搜尋結果格式異常：{str(results)}")

        return "\n".join(parts)
        
    except Exception as e:
        return f"格式化結果時發生錯誤：{str(e)}\n原始結果：{str(result)}"


class TavilySearchTool(BaseTool):
    """CrewAI tool for performing web searches using Tavily Search API."""
    
    name: str = "TavilySearchTool"
    description: str = (
        "Performs web searches using Tavily Search API. "
        "Can search for general information or news, with options for basic or advanced search depth. "
        "Returns formatted results including summary answers, images, and detailed search results."
    )
    args_schema: Type[BaseModel] = TavilySearchInput

    def _run(
        self,
        query: str,
        topic: str = "general",
        search_depth: str = "basic",
        max_results: Optional[int] = 5,
        include_answer: Optional[bool] = True,
        include_raw_content: Optional[bool] = False,
        include_images: bool = False,
        time_range: Optional[str] = None,
        include_image_descriptions: bool = False,
    ) -> str:
        """
        Execute the Tavily search and return formatted results.
        
        Args:
            query: The search query string
            topic: Search topic ('general' or 'news')
            search_depth: Search depth ('basic' or 'advanced')
            max_results: Maximum number of results to return
            include_answer: Whether to include LLM-generated answer
            include_raw_content: Whether to include raw content
            include_images: Whether to search for images
            time_range: Time range for results
            
        Returns:
            Formatted search results as a string
        """
        try:
            # Get API credentials
            api_url = "https://api.tavily.com/search"
            api_key = os.getenv("TAVILY_API_KEY")
            
            if not api_key:
                return "Error: TAVILY_API_KEY not found in environment variables."
            
            # Perform the search
            print(f"🔍 開始搜尋: {query}")
            result = tavily_search(
                api_url=api_url,
                api_key=api_key,
                query=query,
                topic=topic,
                search_depth=search_depth,
                max_results=max_results,
                include_answer=include_answer,
                include_raw_content=include_raw_content,
                include_images=include_images,
                include_image_descriptions=include_image_descriptions,
                time_range=time_range,
            )
            
            print(f"📄 搜尋結果類型: {type(result)}")
            if isinstance(result, dict):
                print("✅ 結果格式正確，開始格式化...")
                return format_tavily_result(result)
            else:
                print(f"❌ 結果格式錯誤: {result}")
                return f"Error: Invalid search result format: {result}"
            
        except requests.exceptions.HTTPError as http_err:
            return f"Error: HTTP error occurred: {http_err}"
        except Exception as e:
            return f"An error occurred during search: {e}"


# 範例使用（CrewAI Tool）
if __name__ == "__main__":
    # 檢查 API Key
    api_key = os.getenv("TAVILY_API_KEY")
    if not api_key:
        print("⚠️  請設置 TAVILY_API_KEY 環境變數才能進行搜尋測試")
        print("範例：export TAVILY_API_KEY=your_api_key")
        print("\n✅ CrewAI Tool 類別建立成功！")
        print("TavilySearchTool 已準備就緒，只需要設置 API Key 即可使用。")
    else:
        # 使用 CrewAI Tool
        search_tool = TavilySearchTool()
        
        print(f"🔑 使用 API Key: {api_key[:10]}...")
        
        # 執行搜尋
        result = search_tool._run(
            query="2025 網球大滿貫是誰",
            search_depth="advanced",
            max_results=5,
            include_answer=True,
            include_raw_content=True,
            include_images=True,
            include_image_descriptions=True
        )
        
        print("=== CrewAI Tool 搜尋結果 ===")
        print(result)


✅ 成功啟用 pysqlite3，SQLite 版本: 3.46.1
🔑 使用 API Key: tvly-QSfH7...
🔍 開始搜尋: 2025 網球大滿貫是誰
📄 搜尋結果類型: <class 'dict'>
✅ 結果格式正確，開始格式化...
=== CrewAI Tool 搜尋結果 ===
=== 摘要回答 ===
Jannik Sinner won the 2025 Wimbledon men's singles title. The tournament took place from June 30 to July 13 in London. Sinner defeated defending champion Carlos Alcaraz in the final.

=== 全域相關圖片 ===
[圖片 1]
  標題：辛纳击败阿尔卡拉斯，赢得2025年温网大满贯冠军| 体育新闻| 半岛电视台
  描述：A young male tennis player dressed in a white jacket with a small logo on the chest holds and kisses a large gold trophy.
  URL：https://chinese.aljazeera.net/wp-content/uploads/2025/07/AFP__20250713__66ND33Y__v2__HighRes__TopshotTennisGbrWimbledon-1752435673-1752471587.webp?resize=770%2C513&quality=80
[圖片 2]
  標題：2025法國網球公開賽#倒數10天     台灣百搭天后#謝淑薇- 2025年開季 ...
  描述：None
  URL：https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=1016038577384562
[圖片 3]
  標題：Jannik Sinner 擊敗Carlos Alcaraz 贏下2025 溫網男單冠軍！生涯第4 座 ...
  描述：A tennis player is preparing to hit the ball on the 