In [8]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda


llm = ChatOpenAI(
    temperature=0.1
)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/allotment.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()


map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm

def map_docs(inputs):
    documents = inputs['documents']
    question = inputs['question']
    return "\n\n".join(
        map_doc_chain.invoke(
            {
                "context" : doc.page_content, "question" : question
            }
        ).content
        for doc in documents
    )

    # results = []
    # for document in documents:
    #     result = map_doc_chain.invoke({
    #         "context": document.page_content,
    #         "question": question
    #     }).content
    #     results.append(result)
    # return results

map_chain = { "documents": retriever, "question" : RunnablePassthrough() } | RunnableLambda(map_docs)
# {
#     "documents" : [Documents],
#     "question" : "What is the allotment meaning?"
# }

final_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", 
         """
         Given the following extracted parts of a long document and a question, create a final answer.
         If you don't know the answer, just say that you don't know. Don't try to make up an answer.
         ----
         {context}
         """,
        ),
        ("human", "{question}"),
    ]
)

chain = {"context": map_chain ,"question" : RunnablePassthrough() } | final_prompt | llm

chain.invoke("What is the allotment meaning?")



Created a chunk of size 1384, which is longer than the specified 600
Created a chunk of size 1450, which is longer than the specified 600
Created a chunk of size 1485, which is longer than the specified 600
Created a chunk of size 1364, which is longer than the specified 600
Created a chunk of size 1243, which is longer than the specified 600
Created a chunk of size 1317, which is longer than the specified 600
Created a chunk of size 1434, which is longer than the specified 600
Created a chunk of size 1367, which is longer than the specified 600
Created a chunk of size 1448, which is longer than the specified 600
Created a chunk of size 1391, which is longer than the specified 600
Created a chunk of size 1412, which is longer than the specified 600
Created a chunk of size 1406, which is longer than the specified 600
Created a chunk of size 1482, which is longer than the specified 600
Created a chunk of size 1322, which is longer than the specified 600
Created a chunk of size 1350, whic

AIMessage(content='The term "allotment" refers to a plot of land made available for individual, non-commercial gardening for growing food plants, forming a kitchen garden away from the residence of the user. These plots are typically subdivided from a larger piece of land into smaller parcels that are assigned to individuals or families for gardening purposes.')