In [3]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda


llm = ChatOpenAI(
    temperature=0.1,
)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./files/chapter_one.md")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriver = vectorstore.as_retriever()


map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim.
            ----------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm


def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]

    # results = []
    # for document in documents:
    #     result = map_doc_chain.invoke(
    #         {"context": document.page_content, "question": question}
    #     ).content
    #     results.append(result)
    # return "\n\n".join(results)

    return "\n\n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content, "question": question}
        ).content
        for doc in documents
    )


map_chain = {
    "documents": retriver,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)


final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
         Given the following extracted parts of a long documnet and a question, create a final answer.
         If you don't know the answer, just say that you don't know. Don't try to make up an answer.
        ----------
        {context}
         """,
        ),
        ("human", "{question}"),
    ]
)


chain = {"context": map_chain, "question": RunnablePassthrough()} | final_prompt | llm

chain.invoke("Describe Nick's character")

AIMessage(content='Nick is portrayed as a tolerant and observant individual. He prides himself on his ability to remain non-judgmental and open-minded, but admits that there are limits to his tolerance. He is disillusioned with the world after returning from the East and desires a more uniform and morally upright society. He is particularly intrigued by Gatsby, who stands out from the rest and possesses a unique sensitivity and hopefulness. Nick comes from a prominent family with a tradition of success, and he decides to venture east to pursue a career in the bond business. Overall, Nick is depicted as a thoughtful and introspective character. He is perceptive and notices details about people and his surroundings. He is also polite, considerate, and trustworthy in his interactions with others. He values his friendships and has a level-headed and rational approach to situations.')

In [4]:
chain.invoke("Describe Gatsby's character")

AIMessage(content="Gatsby's character is described as someone who possesses a heightened sensitivity to the promises of life and has a gift for hope and romantic readiness. He is seen as someone extraordinary and gorgeous, with a mysterious and enigmatic nature. Despite the narrator's initial scorn for him, Gatsby is portrayed as someone who turned out alright in the end.")