In [3]:
from langchain.embeddings import CacheBackedEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
from langchain.storage import LocalFileStore
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders.unstructured import UnstructuredFileLoader
from langchain_community.vectorstores.chroma import Chroma
from langchain_openai import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings

llm = ChatOpenAI(temperature=0.1)
cache_dir = LocalFileStore(".cache/")
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator='\n',
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader('./files/chapter1.txt')
docs = loader.load_and_split(text_splitter=splitter)
embeddings = OpenAIEmbeddings()
cache_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)
vector_store = Chroma.from_documents(docs, cache_embeddings)
retriever = vector_store.as_retriever()

# for doc in list of docs | prompt | llm
# for response in list of llms response | put them all together

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer.
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ----------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
        Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim.
        ----------
        {context}
        """,
        ),
        ("human", "{question}"),
    ]
)
map_doc_chain = map_doc_prompt | llm


def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {
                "context": doc.page_content,
                "question": question,
            }
        ).content
        for doc in documents  # type: ignore
    )  # type: ignore


map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

chain = {"context": map_chain, "question": RunnablePassthrough()} | final_prompt | llm
chain.invoke("Describe Victory Mansions.")

AIMessage(content='Victory Mansions is a dilapidated apartment building in George Orwell\'s novel "1984." It is depicted as a run-down and shabby building with cramped living spaces, poor conditions, and a lack of basic amenities like proper heating and plumbing. The building is under constant surveillance by the Party, with telescreens monitoring the residents\' every move. The hallway smells of boiled cabbage and old rag mats, and there is a large colored poster of an enormous face with the caption "BIG BROTHER IS WATCHING YOU." The building has a faulty lift, intermittent electricity supply, and the protagonist, Winston Smith, resides in a flat on the seventh floor furnished with a telescreen that cannot be completely shut off.', response_metadata={'token_usage': {'completion_tokens': 146, 'prompt_tokens': 535, 'total_tokens': 681}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_b28b39ffa8', 'finish_reason': 'stop', 'logprobs': None})