In [7]:
import os
import csv
import datetime
import yt_dlp
from youtube_transcript_api import YouTubeTranscriptApi
from langchain.agents import Tool, initialize_agent
from langchain_ollama import ChatOllama

# 以 ChatOllama 初始化 LLM，這裡使用 llama3.1 模型，溫度設定為 0
llm = ChatOllama(
    model="llama3.2:3b",
    temperature=0,
    # 可依需求設定其他參數
)

# 工具 1：從指定 YouTube 頻道 URL 擷取影片列表
def fetch_video_list(channel_url: str):
    ydl_opts = {
        'ignoreerrors': True,
        'quiet': True,
        'skip_download': True,
        'extract_flat': True  # 僅擷取影片列表，不下載影片內容
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(channel_url, download=False)
        videos = info.get('entries', [])
    return videos

# 工具 2：嘗試使用 youtube-transcript-api 取得影片逐字稿
def download_transcript(video_id: str):
    try:
        transcript_data = YouTubeTranscriptApi.get_transcript(video_id, languages=['en', 'zh-TW'])
        transcript_text = " ".join([seg['text'] for seg in transcript_data])
        return transcript_text
    except Exception as e:
        print(f"無法取得影片 {video_id} 的逐字稿: {e}")
        return ""

# 工具 3：處理單支影片，若無法取得逐字稿則回傳預設訊息（可依需求改用 Whisper 轉錄）
def process_video(video: dict, today_str: str):
    video_id = video.get('id')
    title = video.get('title', 'No Title')
    url = f"https://www.youtube.com/watch?v={video_id}"
    transcript = download_transcript(video_id)
    if not transcript:
        # 可在此處加入呼叫 Whisper 進行音訊轉文字的邏輯
        print(f"影片 {video_id} 沒有提供逐字稿，暫時回傳空白內容")
        transcript = ""
    return {
        "video_id": video_id,
        "title": title,
        "url": url,
        "transcript": transcript,
        "date": today_str
    }

# 工具 4：將影片資訊列表儲存成 CSV 檔案
def save_to_csv(video_info_list: list, output_file: str):
    folder = os.path.dirname(output_file)
    if folder and not os.path.exists(folder):
        os.makedirs(folder)
        print(f"建立資料夾：{folder}")
    keys = ["video_id", "title", "url", "transcript", "date"]
    with open(output_file, "w", newline='', encoding="utf-8") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=keys)
        writer.writeheader()
        for info in video_info_list:
            writer.writerow(info)
    return f"已儲存影片資訊至 {output_file}"

# 用 LangChain 的 Tool 封裝各功能
fetch_video_list_tool = Tool(
    name="FetchVideoList",
    func=fetch_video_list,
    description="從給定的 YouTube 頻道 URL 擷取影片列表。輸入：頻道 URL 字串。"
)

process_video_tool = Tool(
    name="ProcessVideo",
    func=process_video,
    description="處理單支影片並回傳影片資訊字典。輸入：影片資訊字典和今天日期字串。"
)

save_to_csv_tool = Tool(
    name="SaveToCSV",
    func=save_to_csv,
    description="將影片資訊列表儲存成 CSV 檔案。輸入：影片資訊列表和輸出檔案路徑。"
)

# 整合工具與 ChatOllama LLM，建立 agent
tools = [fetch_video_list_tool, process_video_tool, save_to_csv_tool]
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)

# 主流程：從頻道取得影片列表、逐支處理影片資訊，最後儲存為 CSV
def main_agent(channel_url: str, output_file: str, max_videos: int = 10):
    today_str = datetime.datetime.now().strftime("%Y%m%d")
    videos = fetch_video_list(channel_url)
    video_info_list = []
    count = 0
    for video in videos:
        if count >= max_videos:
            break
        if video is None:
            continue
        info = process_video(video, today_str)
        video_info_list.append(info)
        count += 1
    result = save_to_csv(video_info_list, output_file)
    return result

if __name__ == "__main__":
    # 請依需求修改頻道 URL、CSV 輸出路徑與最大處理影片數
    channel_url = "https://www.youtube.com/@yutinghaofinance/streams"
    output_file = "video/yutinghao_finance_videos.csv"
    max_videos = 10
    result_msg = main_agent(channel_url, output_file, max_videos)
    print(result_msg)

    from langchain.agents import initialize_agent, AgentExecutor

    agent_executor = AgentExecutor.from_agent_and_tools(
        agent=agent,
        tools=tools,
        handle_parsing_errors=True,
        verbose=True
    )
    result = agent_executor.run("請從 https://www.youtube.com/@yutinghaofinance/streams 下載最多 10 支影片資訊並儲存成 CSV")
    print(result)



已儲存影片資訊至 video/yutinghao_finance_videos.csv


ValueError: `run` not supported when there is not exactly one output key. Got [].