In [None]:
!pip install langchain-tavily
!pip install langchain
!pip install langchain_community
!pip install langchain_core
!pip install langchain_huggingface
!pip install rank-bm25
!pip install chromadb
!pip install python-dotenv
!pip install pypdf
!pip install pymupdf
!pip install langchain_openai

Collecting langchain-tavily
  Downloading langchain_tavily-0.1.6-py3-none-any.whl.metadata (11 kB)
Collecting mypy<2.0.0,>=1.15.0 (from langchain-tavily)
  Downloading mypy-1.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Collecting mypy_extensions>=1.0.0 (from mypy<2.0.0,>=1.15.0->langchain-tavily)
  Downloading mypy_extensions-1.1.0-py3-none-any.whl.metadata (1.1 kB)
Downloading langchain_tavily-0.1.6-py3-none-any.whl (14 kB)
Downloading mypy-1.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (12.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m47.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading mypy_extensions-1.1.0-py3-none-any.whl (5.0 kB)
Installing collected packages: mypy_extensions, mypy, langchain-tavily


In [None]:
import os

# 1. colab에서 실행할 경우)

## 구글 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')
base_path = '/content/drive/MyDrive/Projects'

## 문서경로
doc_path = f'{base_path}/rag최적화실험/documents/상해보험약관.pdf'
env = f'{base_path}/comm/.env'

In [None]:
'''
# 2. local에서 실행할 경우

base_path = './'

## 문서경로
doc_path = f'{base_path}/documents/상해보험약관.pdf'
env = f'{base_path}/../../comm/.env'
'''

In [None]:
# env 설정
from dotenv import load_dotenv
load_dotenv(env)

In [None]:
# 문서 로드하기
from langchain_community.document_loaders import PyMuPDFLoader

def load_doc_file():
  loader = PyMuPDFLoader(doc_path)
  docs = loader.load()
  return docs

In [None]:
# 임베딩 모델 가져오기
from langchain_huggingface import HuggingFaceEmbeddings

def get_embeddings():
  model_name = "BAAI/bge-m3"
  model_kwargs = {"device": "cuda"}
  encode_kwargs = {"normalize_embeddings": True}
  return HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)

In [None]:
# 리트리버 가져오기
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever
from langchain_core.documents import Document
from langchain_community.vectorstores import Chroma

def get_retriever(embed_model, docs):
  bm25 = BM25Retriever.from_documents(docs)
  chroma = Chroma.from_documents(documents=docs, embedding=embed_model).as_retriever()
  return EnsembleRetriever(retrievers=[bm25, chroma], weights=[0.7, 0.3], search_type="mmr")

In [None]:
# 서치 검색기 가져오기
from langchain_tavily import TavilySearch

def get_search_ins():
  search_tools = TavilySearch(
      max_results=top_k,
      topic="general",
  )
  return search_tools

In [None]:
from langchain_openai import ChatOpenAI

# 기본 세팅
top_k = 3
docs = load_doc_file()
embed_model = get_embeddings()
retreiver = get_retriever(embed_model, docs)
search_ins = get_search_ins()
llm_model = ChatOpenAI(temperature=0, model="gpt-4o")

In [None]:
# tools 정의

## rag 결과 가져오기
def run_retriever(query):
  return retreiver.invoke(query)[:top_k]

## 서치 결과 가져오기
def run_search(query):
  return search_ins.invoke({"query": query})

tools = [
    Tool(name="retriever_tool", func=run_retriever, description="보험관련 질문시에 사용"),
    Tool(name="search_tool", func=run_search, description="일반적인 질문시에 사용")
]

In [None]:
# Agent 생성
from langchain.agents import ZeroShotAgent, AgentExecutor, Tool
from langchain.prompts import PromptTemplate

## 1. 프롬프트 탬플릿
prefix = """당신은 다양한 도구를 사용할 수 있는 유능한 AI Agent입니다.
사용자의 질문에 대해 도구를 적절히 선택해서 행동하거나, 직접 답변을 제공해야 합니다.
답변은 한국어로 해주세요.

항상 다음 중 하나의 형식을 따라야 합니다:

- 도구가 필요할 때:
Thought: [Your Reasoning]
Action: [Tool Name]
Action Input: [Input Query]

- 바로 답변 가능할 때:
Final Answer: [최종 답변]"""

suffix = """도구 리스트:
{tools}

질문: {query}
{agent_scratchpad}"""

prompt = PromptTemplate(
    input_variables=["query", "tools", "agent_scratchpad"],
    template=prefix + "\n\n" + suffix
)

## 2. Agent 생성
agent = ZeroShotAgent.from_llm_and_tools(llm=llm_model, tools=tools, prompt=prompt)
agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent,
    tools=tools,
    handle_parsing_errors=True,
    verbose=True
)


In [None]:
# 1. General 질문 실행 -> search_tool 사용
answer = agent_executor.run("미국의 수도는 어디인가요?")

In [None]:
# 2. 보험 관련 질문 실행 -> retriever_tool 사용
answer = agent_executor.run("보험 청약을 철회하고 싶어요.")