In [12]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

llm = ChatOpenAI(temperature=0.1)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/chapter_one.docx")
docs = loader.load_and_split(text_splitter=splitter)
embeddings = OpenAIEmbeddings()
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

# retriever will give us the document list
retriever = vectorstore.as_retriever()


# list of doc
# for respone in list of llms reponse | put them all together
def map_docs(input):
    documents = input["documents"]
    question = input["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {
                "context": doc.page_content,
                "question": question,
            }
        ).content
        for doc in documents
    )


map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

# for doc in list:
# 	doc | prompt | llm

map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return :
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm


# final doc | prompt | llm
final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer.
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

chain = {"context": map_chain, "question": RunnablePassthrough()} | final_prompt | llm

chain.invoke("Describe Victory Mansions.")
# chain.invoke("Where does Winston go to work?")
# >>>AIMessage(content='Winston goes to work at the Ministry of Truth.')

AIMessage(content='Victory Mansions is a building with glass doors that let in gritty dust, a hallway that smells of boiled cabbage and old rag mats, and a large colored poster of an enormous face on the wall. The building has a faulty lift that is rarely working, and the electricity is cut off during daylight hours as part of an economy drive in preparation for Hate Week. The flat in Victory Mansions is located seven flights up and is described as having a telescreen on the wall that cannot be completely shut off. The building is also depicted as having a bleak and oppressive atmosphere, with the poster of the enormous face bearing the caption "BIG BROTHER IS WATCHING YOU."')