In [32]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.memory import ConversationBufferMemory


"""
Stuff Documents 체인을 사용하여 완전한 RAG 파이프라인을 구현하세요.
체인을 수동으로 구현해야 합니다.
체인에 ConversationBufferMemory를 부여합니다.
이 문서를 사용하여 RAG를 수행하세요: https://gist.github.com/serranoarevalo/5acf755c2b8d83f1707ef266b82ea223
체인에 다음 질문을 합니다:
Aaronson 은 유죄인가요?
그가 테이블에 어떤 메시지를 썼나요?
Julia 는 누구인가요?
"""

llm = ChatOpenAI(
    temperature=0.1,
)

# 1. 문서 로드 (Document Loading)
loader = TextLoader("./files/1984_gist.txt", encoding='utf-8')


# 2. 문서 쪼개기 (Document Splitting, CharacterTextSplitter)
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
docs = loader.load_and_split(text_splitter=splitter)

# 3. 임베딩 생성 및 캐시 (OpenAIEmbeddings, CacheBackedEmbeddings)
cache_dir = LocalFileStore("./.cache/")
embeddings = OpenAIEmbeddings()
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

# 4. 벡터 스토어 생성 (FAISS)
vectorstore = FAISS.from_documents(docs, cached_embeddings)
retriever = vectorstore.as_retriever()

# 5. 대화 메모리와 질문 처리 (ConversationBufferMemory)
memory = ConversationBufferMemory(
    llm=llm,
    max_token_limit=120,
    memory_key="chat_history",
    return_messages=True,
)

def format_docs(docs):
    return "\n\n".join(document.page_content for document in docs)

def load_memory(_):
    return memory.load_memory_variables({})["chat_history"]

# 6. 체인 연결 
map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            -------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)
map_doc_chain = map_doc_prompt | llm

def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content, "question": question}
        ).content
        for doc in documents
    )

map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer. 
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            please answer in Korean.
            ------
            {context}
            """,
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

chain = (
    {
        "context": map_chain,  # map_chain이 문맥 추출을 담당
        "question": RunnablePassthrough(),  # 질문 그대로 전달
        "chat_history": RunnableLambda(load_memory),  # 대화 메모리 로드
    }
    | final_prompt
    | llm
)

def invoke_chain(question):
    print(f"Question: {question}")
    result = chain.invoke(question)
    memory.save_context({"input": question}, {"answer": result.content})
    print(f"Answer: {result.content}\n")


invoke_chain("Is Aaronson guilty?")
invoke_chain("What message did he write in the table?")
invoke_chain("Who is Julia?")

Question: Is Aaronson guilty?
Answer: 예, Aaronson은 그가 기소된 범죄로 유죄입니다.

Question: What message did he write in the table?
Answer: 그는 다음과 같은 메시지를 테이블에 썼습니다: FREEDOM IS SLAVERY, TWO AND TWO MAKE FIVE, GOD IS POWER.

Question: Who is Julia?
Answer: Julia는 이 문서에서 언급된 캐릭터 중 하나로, 주인공인 Winston Smith와 사랑을 나누는 여성입니다.

