In [None]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain.memory import ConversationBufferMemory


"""
Stuff Documents 체인을 사용하여 완전한 RAG 파이프라인을 구현하세요.
체인을 수동으로 구현해야 합니다.
체인에 ConversationBufferMemory를 부여합니다.
이 문서를 사용하여 RAG를 수행하세요: https://gist.github.com/serranoarevalo/5acf755c2b8d83f1707ef266b82ea223
체인에 다음 질문을 합니다:
Aaronson 은 유죄인가요?
그가 테이블에 어떤 메시지를 썼나요?
Julia 는 누구인가요?
"""

llm = ChatOpenAI(
    temperature=0.1,
)

# 1. 문서 로드 (Document Loading)
loader = TextLoader("./files/1984_gist.txt", encoding='utf-8')

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = TextLoader("./files/1984_gist.txt", encoding='utf-8')

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

memory = ConversationBufferMemory(return_messages=True)

def format_docs(docs):
    return "\n\n".join(document.page_content for document in docs)

def load_memory(_):
    return memory.load_memory_variables({})["history"]

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}"),
    MessagesPlaceholder(variable_name="history"),
    ("human", "{question}"),
])

chain = RunnablePassthrough.assign(
    context=lambda x: format_docs(retriever.invoke(x["question"])),
    history=load_memory
) | prompt | llm

def invoke_chain(question):
    result = chain.invoke({"question": question})
    memory.save_context({"input": question}, {"output": result.content})
    print(f"Question: {question}")
    print(f"Answer: {result.content}\n")

invoke_chain("Aaronson 은 유죄인가요?")
invoke_chain("그가 테이블에 어떤 메시지를 썼나요?")
invoke_chain("Julia 는 누구인가요?")

In [17]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.memory import ConversationBufferMemory


"""
Stuff Documents 체인을 사용하여 완전한 RAG 파이프라인을 구현하세요.
체인을 수동으로 구현해야 합니다.
체인에 ConversationBufferMemory를 부여합니다.
이 문서를 사용하여 RAG를 수행하세요: https://gist.github.com/serranoarevalo/5acf755c2b8d83f1707ef266b82ea223
체인에 다음 질문을 합니다:
Aaronson 은 유죄인가요?
그가 테이블에 어떤 메시지를 썼나요?
Julia 는 누구인가요?
"""

llm = ChatOpenAI(
    temperature=0.1,
)

# 1. 문서 로드 (Document Loading)
loader = TextLoader("./files/1984_gist.txt", encoding='utf-8')


# 2. 문서 쪼개기 (Document Splitting, CharacterTextSplitter)
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
docs = loader.load_and_split(text_splitter=splitter)

# 3. 임베딩 생성 및 캐시 (OpenAIEmbeddings, CacheBackedEmbeddings)
cache_dir = LocalFileStore("./.cache/")
embeddings = OpenAIEmbeddings()
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

# 4. 벡터 스토어 생성 (FAISS)
vectorstore = FAISS.from_documents(docs, cached_embeddings)
retriever = vectorstore.as_retriever()

# 5. 대화 메모리와 질문 처리 (ConversationBufferMemory)
memory = ConversationBufferMemory(
    llm=llm,
    max_token_limit=120,
    memory_key="chat_history",
    return_messages=True,
)

def format_docs(docs):
    return "\n\n".join(document.page_content for document in docs)

def load_memory(_):
    return memory.load_memory_variables({})["chat_history"]

# 6. 체인 연결 
map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            -------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)
map_doc_chain = map_doc_prompt | llm

def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content, "question": question}
        ).content
        for doc in documents
    )

map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer. 
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

chain = {"context":map_chain, "question" : RunnablePassthrough(), "extra": RunnablePassthrough()} | final_prompt | llm

chain.invoke("Describe Victory Mansions")

AIMessage(content='Victory Mansions is a dilapidated and run-down apartment building in a dystopian society, as depicted in George Orwell\'s novel "1984." It is characterized by poor living conditions, lack of amenities, and an oppressive atmosphere. The building symbolizes decay and deprivation in the society portrayed in the novel. The protagonist, Winston Smith, resides in Victory Mansions, where he has his own corner table always reserved for him due to others avoiding sitting too close to him. Despite its shabby appearance and foul smell, Winston finds a sense of routine and familiarity in this place, with the chessboard always waiting for him and the staff knowing his habits without being told.')