In [118]:
from langchain.chat_models import ChatOpenAI
from langchain_community.document_loaders import TextLoader, PyPDFLoader, UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma, FAISS
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.memory import ConversationBufferMemory

llm = ChatOpenAI(temperature=0.5)
memory = ConversationBufferMemory(memory_key="history", return_messages=True)


loader = TextLoader("./document/chapter_three.txt")
splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1000, 
    chunk_overlap=100, 
    separators="\n",
) 
loader.load_and_split(text_splitter=splitter)


docs = loader.load_and_split(text_splitter=splitter)
embeddings = OpenAIEmbeddings()

cache_dir = LocalFileStore("./.cache/")
cache_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)
vectorstore = FAISS.from_documents(docs, cache_embeddings) 


retriever = vectorstore.as_retriever()


prompt = ChatPromptTemplate.from_messages(
    [ 
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            -------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)
map_doc_chain = prompt | llm


def map_docs(inputs):
    # print(inputs)
    documents = inputs['document']
    question = inputs['question']
    return "\n\n".join(map_doc_chain.invoke({
        "context": doc.page_content,
        "question": question
    }).content for doc in documents)

map_chain = {"document":retriever, "question":RunnablePassthrough()} | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer. 
            If you don't know the answer, just say that you don't know. Don't try to make up an answer. 추가적으로 한글로 답해주세요.
            ------
            Here is the conversation history so far:
            {history}
            
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)


def get_memory(_):
    history = memory.load_memory_variables({})["history"]
    return history

chain = {"context":map_chain, "question": RunnablePassthrough(), "history": RunnableLambda(get_memory) } | final_prompt | llm


In [119]:
question = "Aaronson은 유죄인가요?"
result = chain.invoke(question)
print(result.content)

memory.save_context({"question": question}, {"response": result.content})

Aaronson은 그의 혐의에 대해 유죄로 간주되었습니다.


In [120]:
question = "그가 테이블에 어떤 메시지를 썼나요?"
result = chain.invoke(question)
print(result.content)

memory.save_context({"question": question}, {"response": result.content})

죄송합니다, 그 정보는 제가 알고 있는 대로에 따르면 없습니다.


In [121]:
question = "Julia 는 누구인가요?"
result = chain.invoke(question)
print(result.content)

memory.save_context({"question": question}, {"response": result.content})

Julia는 위의 텍스트에서 언급된 캐릭터로, 주인공인 Winston Smith가 사랑하는 여성입니다. 위의 텍스트에서는 Winston이 감옥에서 살아가는 동안 Julia에 대한 강한 그리움과 사랑을 느낀 장면이 나타납니다.


In [122]:
question = "이전에 어떤 질문을 했나요"
result = chain.invoke(question)
print(result.content)

memory.save_context({"question": question}, {"response": result.content})

이전 질문은 "Julia 는 누구인가요?" 였습니다.
