In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore

from langchain.
from langchain.chains import RetrievalQA
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

from langchain.prompts import ChatPromptTemplate

llm = ChatOpenAI(
    temperature=0.1,
)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = DirectoryLoader(
    path='../',
    glob="**/files/*.txt"
)

docs = loader.load_and_split(text_splitter=splitter)
embeddings = OpenAIEmbeddings()
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            ------
            {context}
            """
        ),
        ("human", "{question}"),
    ]

)

map_doc_chain = map_doc_prompt | llm

def map_docs(inputs):
    documents = inputs['documents']
    question = inputs['question']
    return "\n\n".join(map_doc_chain.invoke({
        "context":doc.page_content,
        "question":question
    }).content for doc in documents)

map_chain = {
    "documents" : retriever, "question": RunnablePassthrough()
    } | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer. 
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)


chain = { "context" : map_chain , "question" : RunnablePassthrough() } | final_prompt | llm 

chain.invoke("What is benefits of using LCEL")



AIMessage(content="The benefits of using LCEL (Language Chain Execution Library) include:\n\n1. Unified interface: LCEL objects implement the Runnable interface, allowing for common invocation methods such as invoke, batch, stream, ainvoke, and more.\n\n2. Composition primitives: LCEL provides primitives that make it easy to compose chains, parallelize components, add fallbacks, dynamically configure chain internals, and more.\n\n3. Simplified chain building: LCEL simplifies the process of building complex chains from basic components, enabling the creation of sophisticated workflows.\n\n4. Reusability: Chains of LCEL objects are themselves LCEL objects, making them reusable and modular.\n\n5. Increased functionality: LCEL enhances the functionality of existing components and allows for the creation of new functionality by combining and configuring different components.\n\n6. Streaming support: LCEL enables streaming tokens directly from a Language Model to a streaming output parser, p