In [None]:
from operator import itemgetter
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma, FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough
from langchain.callbacks import StreamingStdOutCallbackHandler

llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature =0.1,
    streaming=True,
    callbacks=[
        StreamingStdOutCallbackHandler()
    ]
)

memory = ConversationBufferMemory(
    memory_key="history",
    return_messages=False
)

#문서 splitter와 loader
splitter= CharacterTextSplitter(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/chapter_three.txt")

docs = loader.load_and_split(text_splitter=splitter)

#로딩할 문서 cache 및 embedding

cache_dir = LocalFileStore("./cache/")

embeddings = OpenAIEmbeddings()

cache_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

#검색할 도구 생성(vectorstore and retriever)

vectorstore = FAISS.from_documents(docs, cache_embeddings)

retriever = vectorstore.as_retriever()

#prompt 생성: 잘 모르겠는 답은 이야기하지 마라.
prompt = ChatPromptTemplate.from_messages([
    ("system",
     "You are a helpful assistance. Answer question using only following text. "
     "If you don't know the answer just say you don't know, don't make it up:\n\n"
     "Context:\n{context}\n\n"
     "Conversation so far:\n{history}"
    ),
    ("human", "{question}")
])

def load_memory(input):
    return memory.load_memory_variables({})["history"] 

#docs를 문자로 바꿈. 배열의 content를 가지고 와서 하나의 문서로 엮음.
def doc_to_str(docs):
    return "\n\n".join(doc.page_content for doc in docs)

#retriever에 doc_to_str을 추가시켜 줌. history는 함수가 실행되는 것이므로 그대로 
# question이 retriever로 넘어갈때 dic구조 그대로 넘어가서 생기는 문제라고 함.
chain = { "context":itemgetter("question")|retriever | doc_to_str, "history":load_memory, "question":itemgetter("question")} | prompt | llm

#대화 내용을 저장하는 로직 만들기
def invoke_chain(question):
    result = chain.invoke(
        {
            "question":question
        }
        )
    memory.save_context({"input":question},{"output":result.content}) 

invoke_chain("Is Aaronson guilty?") 

Created a chunk of size 717, which is longer than the specified 600
Created a chunk of size 608, which is longer than the specified 600
Created a chunk of size 642, which is longer than the specified 600
Created a chunk of size 1444, which is longer than the specified 600
Created a chunk of size 1251, which is longer than the specified 600
Created a chunk of size 1012, which is longer than the specified 600
Created a chunk of size 1493, which is longer than the specified 600
Created a chunk of size 819, which is longer than the specified 600
Created a chunk of size 1458, which is longer than the specified 600
Created a chunk of size 1411, which is longer than the specified 600
Created a chunk of size 742, which is longer than the specified 600
Created a chunk of size 669, which is longer than the specified 600
Created a chunk of size 906, which is longer than the specified 600
Created a chunk of size 703, which is longer than the specified 600
Created a chunk of size 1137, which is lon

According to the text, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with, but the narrator had never seen the photograph that disproved their guilt and believed it had never existed.

In [7]:
invoke_chain("What message did he write on the table?")

He wrote: FREEDOM IS SLAVERY, TWO AND TWO MAKE FIVE, and GOD IS POWER.

In [8]:
invoke_chain("Who is Julia?")

Julia is a character who is loved by the narrator. She is mentioned in the context of the narrator's overwhelming feelings for her and his desire to help her.