In [5]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores.faiss import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.memory import ConversationBufferMemory
import time

llm = ChatOpenAI(
    temperature=0.1,
    model="gpt-4o-mini-2024-07-18",
)

memory = ConversationBufferMemory(
    llm=llm,
    memory_key="history",
    return_messages=True,
)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/chapter_three.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embedding = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embedding)

retriever = vectorstore.as_retriever()

map_docs_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

map_docs_chain = map_docs_prompt | llm


def get_history(_):
   return memory.load_memory_variables({})["history"]


def map_docs(input):
    documents = input["documents"]
    question = input["question"]
    results = []
    for document in documents:
        result = map_docs_chain.invoke(
            {"context": document.page_content, "question": question}
        ).content
        results.append(result)
        time.sleep(20)
    results = "\n\n".join(results)
    return results


map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
    Given the following extracted parts of a long document and a question, create a final answer.
    if you don't know the answer, just say that you don't know. Don't try to make up an answer.
    ------
    {context}
    """,
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)

chain = (
    {"context": map_chain, "question": RunnablePassthrough(), "history": get_history}
    | final_prompt
    | llm
)

def invoke_chain(question):
    result = chain.invoke(question).content
    memory.save_context(
        {"input": question},
        {"output": result},
    )
    print(result)

In [6]:
print(invoke_chain("Is Aaronson guilty?"))

The provided text does not contain enough information to determine if Aaronson is guilty. It mentions that he, along with Jones and Rutherford, were charged with crimes, but it also states that the photograph that disproved their guilt had never existed and was invented. Therefore, I cannot definitively say if Aaronson is guilty or not based on the information given.
None


In [3]:
print(invoke_chain("What message did he write on the table?"))

He wrote the following messages on the table:
1. FREEDOM IS SLAVERY
2. TWO AND TWO MAKE FIVE
3. GOD IS POWER
None


In [4]:
print(invoke_chain("Who is Julia?"))

Julia is a character who is deeply loved by the narrator. The narrator experiences a powerful hallucination of her presence and reflects on his love for her, which he realizes is even stronger than when they were together and free. He has a sense that she is still alive and in need of his help. However, the text does not provide specific details about her background or identity.
None
