In [2]:
from langchain.chat_models import ChatOpenAI
from langchain_community.document_loaders import TextLoader, PyPDFLoader, UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma, FAISS
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.memory import ConversationBufferMemory

llm = ChatOpenAI(temperature=0.5)
memory = ConversationBufferMemory(llm=llm, memory_key="history", return_messages=True)


loader = TextLoader("./document/chapter_three.txt")
splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1000, 
    chunk_overlap=100, 
    separators="\n",
) 
loader.load_and_split(text_splitter=splitter)


docs = loader.load_and_split(text_splitter=splitter)
embeddings = OpenAIEmbeddings()

cache_dir = LocalFileStore("./.cache/")
cache_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)
vectorstore = FAISS.from_documents(docs, cache_embeddings) 


retriever = vectorstore.as_retriever()


prompt = ChatPromptTemplate.from_messages(
    [ 
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            -------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)
map_doc_chain = prompt | llm


def map_docs(inputs):
    # print(inputs)
    documents = inputs['document']
    question = inputs['question']
    return "\n\n".join(map_doc_chain.invoke({
        "context": doc.page_content,
        "question": question
    }).content for doc in documents)

map_chain = {"document":retriever, "question":RunnablePassthrough()} | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer. 
            If you don't know the answer, just say that you don't know. Don't try to make up an answer. 추가적으로 한글로 답해주세요.
            ------
            Here is the conversation history so far:
            {history}
            
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)


def get_memory(_):
    history = memory.load_memory_variables({})["history"]
    return history

chain = {"context":map_chain, "question": RunnablePassthrough(), "history": RunnableLambda(get_memory) } | final_prompt | llm


In [3]:
question = "Aaronson은 유죄인가요?"
result = chain.invoke(question)
print(result.content)

memory.save_context({"question": question}, {"response": result.content})

제가 알기로는 Aaronson이 유죄 판결을 받은 것은 아닌 것으로 알고 있습니다.


In [4]:
question = "그가 테이블에 어떤 메시지를 썼나요?"
result = chain.invoke(question)
print(result.content)

memory.save_context({"question": question}, {"response": result.content})

제가 알기로는 그가 테이블에 어떤 메시지를 쓴 것은 언급되지 않았습니다.


In [5]:
question = "What message did he write in the table?"
result = chain.invoke(question)
print(result.content)

memory.save_context({"question": question}, {"response": result.content})

그가 테이블에 쓴 메시지는 다음과 같습니다:

FREEDOM IS SLAVERY

TWO AND TWO MAKE FIVE

GOD IS POWER

:''

: ''

: ''


In [121]:
question = "Julia 는 누구인가요?"
result = chain.invoke(question)
print(result.content)

memory.save_context({"question": question}, {"response": result.content})

Julia는 위의 텍스트에서 언급된 캐릭터로, 주인공인 Winston Smith가 사랑하는 여성입니다. 위의 텍스트에서는 Winston이 감옥에서 살아가는 동안 Julia에 대한 강한 그리움과 사랑을 느낀 장면이 나타납니다.


## 답

In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationBufferMemory

llm = ChatOpenAI(
    temperature=0.1,
)

  llm = ChatOpenAI(


In [None]:
cache_dir = LocalFileStore("./.cache/")
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = TextLoader("./document/chapter_three.txt")
docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    cache_dir,
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

In [4]:
memory = ConversationBufferMemory(
    llm=llm,
    max_token_limit=20,
    return_messages=True,
)
def load_memory(_):
    return memory.load_memory_variables({})["history"]


retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}",
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)

  memory = ConversationBufferMemory(


In [5]:
chain = (
    {
        "context": retriever,
        "question": RunnablePassthrough(),
        "history": load_memory,
    }
    | prompt
    | llm
)


def invoke_chain(question):
    result = chain.invoke(question)
    print(result)
    memory.save_context({"input": question}, {"output": result.content})

In [7]:
invoke_chain("Aaronson은 유죄인가요?")

content='예, Aaronson은 유죄로 여겨졌습니다.' additional_kwargs={} response_metadata={'token_usage': <OpenAIObject at 0x1175a0e50> JSON: {
  "prompt_tokens": 2586,
  "completion_tokens": 21,
  "total_tokens": 2607,
  "prompt_tokens_details": {
    "cached_tokens": 0,
    "audio_tokens": 0
  },
  "completion_tokens_details": {
    "reasoning_tokens": 0,
    "audio_tokens": 0,
    "accepted_prediction_tokens": 0,
    "rejected_prediction_tokens": 0
  }
}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-bd4d71ae-26aa-435f-b772-e2e06e7828f6-0'


In [10]:
invoke_chain("What message did he write in the table?")


content='He wrote "FREEDOM IS SLAVERY" and "TWO AND TWO MAKE FIVE" on the table.' additional_kwargs={} response_metadata={'token_usage': <OpenAIObject at 0x117555d50> JSON: {
  "prompt_tokens": 2664,
  "completion_tokens": 27,
  "total_tokens": 2691,
  "prompt_tokens_details": {
    "cached_tokens": 0,
    "audio_tokens": 0
  },
  "completion_tokens_details": {
    "reasoning_tokens": 0,
    "audio_tokens": 0,
    "accepted_prediction_tokens": 0,
    "rejected_prediction_tokens": 0
  }
}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-3b01972d-72c2-4ec2-944e-643fe561411f-0'


In [None]:
invoke_chain("그가 테이블에 어떤 메시지를 썼나요?")
