Perplexity 에이전트 시스템

In [10]:
# !pip install langchain_chroma pytube youtube-search youtbue-transcript-api next_asyncio wolframalpha arxiv

In [11]:
from dotenv import load_dotenv
load_dotenv()

True

WolframAlpha API 활용을 위한 비동기 설정
- async 호출만 가능하므로 nest_asyncio이용

In [12]:
import asyncio
asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())

import nest_asyncio
nest_asyncio.apply()

In [13]:
from langchain_community.utilities.wolfram_alpha import WolframAlphaAPIWrapper
wolfram = WolframAlphaAPIWrapper()

In [14]:
from typing import Annotated, Literal
from typing_extensions import TypedDict
from langgraph.graph.message import add_messages

class State(TypedDict):
    messages: Annotated[list, add_messages]
    focus: Literal["web", "academic", "video", "math"]

웹 검색, 눈문 검색, 수학 계산 도구 정의

In [15]:
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.utilities import ArxivAPIWrapper
from langchain_community.tools import YouTubeSearchTool
from langchain_community.utilities.wolfram_alpha import WolframAlphaAPIWrapper
from langchain_core.tools import tool
from langgraph.prebuilt import ToolNode
import re

web_tool = TavilySearchResults(max_results=3)

@tool
def academic_tool(query:str):
    """
    academic paper search tool
    """
    arxiv = ArxivAPIWrapper()
    docs = arxiv.run(query)
    return docs

@tool
def math_tool(query:str):
    """
    math tool
    """
    wolfram = WolframAlphaAPIWrapper()
    result = wolfram.run(query)
    return result

In [16]:
# Test
academic_tool.run("DeepSeek Principle")

"Published: 2024-12-13\nTitle: DeepSeek-VL2: Mixture-of-Experts Vision-Language Models for Advanced Multimodal Understanding\nAuthors: Zhiyu Wu, Xiaokang Chen, Zizheng Pan, Xingchao Liu, Wen Liu, Damai Dai, Huazuo Gao, Yiyang Ma, Chengyue Wu, Bingxuan Wang, Zhenda Xie, Yu Wu, Kai Hu, Jiawei Wang, Yaofeng Sun, Yukun Li, Yishi Piao, Kang Guan, Aixin Liu, Xin Xie, Yuxiang You, Kai Dong, Xingkai Yu, Haowei Zhang, Liang Zhao, Yisong Wang, Chong Ruan\nSummary: We present DeepSeek-VL2, an advanced series of large Mixture-of-Experts (MoE)\nVision-Language Models that significantly improves upon its predecessor,\nDeepSeek-VL, through two key major upgrades. For the vision component, we\nincorporate a dynamic tiling vision encoding strategy designed for processing\nhigh-resolution images with different aspect ratios. For the language\ncomponent, we leverage DeepSeekMoE models with the Multi-head Latent Attention\nmechanism, which compresses Key-Value cache into latent vectors, to enable\nefficie

In [17]:
# Test
math_tool.run("123*123^2")

'Assumption: 123×123^2 \nAnswer: 1860867'

In [23]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.tools import YouTubeSearchTool
from langchain_community.document_loaders import YoutubeLoader
from langchain_core.documents import Document
import ast

youtube_search_tool = YouTubeSearchTool()

@tool
def video_tool(query: str) -> str:
    """
    Retriever tool for the transcript for a YouTube video.
    If user want to find some information, this tool is good to gather youtube video information.
    query should be given in string format.
    """
    urls = youtube_search_tool.run(query)
    urls = ast.literal_eval(urls)
    docs = []
    for url in urls:
        loader = YoutubeLoader.from_youtube_url(
            url, add_video_info=True, language=["en", "ko"]
        )
        scripts = loader.load()
        script_content = scripts[0].page_content
        # title = scripts[0].metadata['title']
        # author = scripts[0].metadata['author']
        author = "Unknown"
        title = "Unknown"
        doc = Document(page_content=script_content, metadata={"source": url, "title": title, "author":author})
        docs.append(doc)

    text_splitter = RecursiveCharacterTextSplitter(
        separators = ["\n\n", "\n", ".", ",", " ", ""],
        chunk_size = 1000, chunk_overlap=0
    )
    texts = text_splitter.split_documents(docs)
    embeddings = OpenAIEmbeddings()
    db = Chroma.from_documents(texts, embeddings)
    retriever = db.as_retriever()
    retrieved_docs = retriever.invoke(query)

    video_results = []
    for doc in retrieved_docs:
        title = doc.metadata.get('title', 'No title available')
        author = doc.metadata.get('author', 'No author available')
        script_content = doc.page_content

        video_info = f"""
        Video Information:
        ------------------
        Title: {title}
        Author: {author}
        Transcript: {script_content}
        ------------------
        """
        video_results.append(video_info)
    
    return "\n\n".join(video_results)

In [26]:
video_tool.run("IU가 누구야?")

PytubeError: Exception while accessing title of https://youtube.com/watch?v=rqAE9OiK4CY. Please file a bug report at https://github.com/pytube/pytube

In [None]:
from pytube import YouTube
yt = YouTube(f"https://www.youtube.com/watch?v=rqAE9OiK4CY")
# yt = YouTube(f"https://www.youtube.com/watch?v=pDvBiB1waBk")
print(yt.vid_info)
# video_info = {
#     "title": yt.title or "Unknown",
#     "description": yt.description or "Unknown",
#     "view_count": yt.views or 0,
#     "thumbnail_url": yt.thumbnail_url or "Unknown",
#     "publish_date": yt.publish_date.strftime("%Y-%m-%d %H:%M:%S")
#     if yt.publish_date
#     else "Unknown",
#     "length": yt.length or 0,
#     "author": yt.author or "Unknown",
# }
# print(video_info)

{'responseContext': {'visitorData': 'CgttTzBkOWdpODJ0TSjd6qK9BjIKCgJLUhIEGgAgTA%3D%3D', 'serviceTrackingParams': [{'service': 'GFEEDBACK', 'params': [{'key': 'ipcc', 'value': '0'}, {'key': 'is_viewed_live', 'value': 'False'}, {'key': 'is_alc_surface', 'value': 'false'}, {'key': 'logged_in', 'value': '0'}, {'key': 'e', 'value': '9406121,24004644,24077241,24078649,24173287,24181174,24241378,24290153,24548629,24552800,24556101,39328397,51020570,51025415,51037346,51037349,51043774,51050361,51053689,51065188,51089007,51111738,51115184,51124104,51152050,51157411,51176511,51178316,51178327,51178348,51178351,51183909,51217504,51220164,51225393,51227037,51228850,51237842,51242448,51248734,51255676,51256074,51256084,51274583,51276557,51276565,51281227,51285717,51287196,51292055,51296439,51298020,51299710,51299724,51303667,51303669,51303789,51304155,51305494,51305839,51310742,51311031,51311034,51313109,51313767,51313802,51318845,51322669,51326932,51327146,51327165,51327186,51330475,51331481,51331

In [None]:

@tool
def youtube_tool(query:str):
    """
    youtube tool
    """
    yt = YouTubeSearchTool()
    result = yt.run(query)
    return result