In [None]:
!pip install langchain langchain-openai langchain-openai langchain_chroma langchain-text-splitters langchain_community langchainhub

In [None]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [None]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate

In [None]:
# 1. 3개의 블로그 포스팅 본문을 Load: WebBaseLoader 활용
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

loader = WebBaseLoader(urls)
documents = loader.load()


# 2. 불러온 본문을 Split (Chunking) : recursive text splitter 활용
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(documents)


# 3. Chunks 를 임베딩하여 Vector store 저장: openai, chroma 사용
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(model="text-embedding-3-small"))


# 4. User query = ‘agent memory’ 를 받아 관련된 chunks를 retrieve
# FMMR algorithm을 사용하고 관계성이 높은 top 5 문서를 retrieve
user_query = "agent memory"
retriever = vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={'k': 5, 'fetch_k': 50}
)
retrieved_docs = retriever.invoke(user_query)


# 5. LLM을 통해 관련성 평가
parser = JsonOutputParser()
prompt = PromptTemplate(
    template="""
    User Query: {user_query}
    Retrieved Chunk: {chunk}

    Evaluate the relevance of the retrieved chunk to the user query.
    If relevant, respond with {{"relevance": "yes"}}
    If not relevant, respond with {{"relevance": "no"}}
    """,
    input_variables=["user_query", "chunk"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser


def generate_answer(content):
    answer_prompt = PromptTemplate(
        template="""
            content: {content}
            주어진 content를 받아서 100자 이내로 요약해줘.
        """,
        input_variables=["content"]
    )
    answer_chain = answer_prompt | llm | parser
    answer_chain.invoke({"content": content})
    print(result)

for doc in retrieved_docs:
    chunk_content = doc.page_content
    result = chain.invoke({"user_query": user_query, "chunk": chunk_content})
    if result["relevance"] == "yes":
        generate_answer(chunk_content)
    else:
        print("No Relevance")