In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma, FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.memory import ConversationSummaryBufferMemory

llm = ChatOpenAI(temperature=0.1)

cache_dir = LocalFileStore("../.cache/")

memory = ConversationSummaryBufferMemory(
    llm=llm,
    # max_token_limit=120,
    return_messages=True,
    memory_key="chat_history",
    output_key="answer"
)

splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100
)

loader = UnstructuredFileLoader("../files/chapter_03.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = Chroma.from_documents(docs, cached_embeddings)
# vectorstore = FAISS.from_documents(docs, cached_embeddings)

# chain = RetrievalQA.from_chain_type(
#     llm=llm,
#     chain_type="stuff",
#     retriever=vectorstore.as_retriever()
# )

chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    memory=memory,
    chain_type="stuff",
    verbose=True
)

# chain.run("Aaronson 은 유죄인가요?")
# chain.run("그가 테이블에 어떤 메시지를 썼나요?")
# chain.run("Julia 는 누구인가요?")

questions = [
    "Aaronson 은 유죄인가요?",
    "그가 테이블에 어떤 메시지를 썼나요?",
    "Julia 는 누구인가요?"
]

for question in questions:
    print(f"\n질문: {question}")
    response = chain({"question": question})
    print(f"답변: {response['answer']}")

Created a chunk of size 717, which is longer than the specified 600
Created a chunk of size 608, which is longer than the specified 600
Created a chunk of size 642, which is longer than the specified 600
Created a chunk of size 1444, which is longer than the specified 600
Created a chunk of size 1251, which is longer than the specified 600
Created a chunk of size 1012, which is longer than the specified 600
Created a chunk of size 1493, which is longer than the specified 600
Created a chunk of size 819, which is longer than the specified 600
Created a chunk of size 1458, which is longer than the specified 600
Created a chunk of size 1411, which is longer than the specified 600
Created a chunk of size 742, which is longer than the specified 600
Created a chunk of size 669, which is longer than the specified 600
Created a chunk of size 906, which is longer than the specified 600
Created a chunk of size 703, which is longer than the specified 600
Created a chunk of size 1137, which is lon


질문: Aaronson 은 유죄인가요?


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
'Under the spreading chestnut tree I sold you and you sold me----'
The tears welled up in his eyes. A passing waiter noticed that his glass was empty and came back with the gin bottle.

'As you lie there,' said O'Brien, 'you have often wondered--you have even asked me--why the Ministry of Love should expend so much time and trouble on you. And when you were free you were puzzled by what was essentially the same question. You could grasp the mechanics of the Society you lived in, but not its underlying motives. Do you remember writing in your diary, "I understand HOW: I do not understand WHY"? It was when you thought about "why" that you doubted your