# CodingMindset - RAG App - LangChain

!%pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai langchain-chroma pypdf

os.environ["OPENAI_API_KEY"] = getpass.getpass()

In [20]:
from langchain_community.document_loaders import PyPDFDirectoryLoader


documents = PyPDFDirectoryLoader("./GarajeDeIdeas/2.RAG/data/").load()

In [21]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(documents)


In [22]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()


In [23]:
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [24]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [25]:
result = rag_chain.invoke({"input": "What is mamba?"})



In [26]:
result

{'input': 'What is mamba?',
 'context': [Document(page_content='genomics, audio, and video. Our results suggest that Mamba is a strong candidate to be a general sequence model\nbackbone.\nAcknowledgments\nWe thank Karan Goel, Arjun Desai, and Kush Bhatia for helpful feedback on the draft.\nReferences\n[1] Martin Arjovsky, Amar Shah, and Yoshua Bengio. “Unitary Evolution Recurrent Neural Networks”. In: The\nInternational Conference on Machine Learning (ICML) . 2016, pp. 1120–1128.\n[2] iga Avsec, Vikram Agarwal, Daniel Visentin, Joseph R Ledsam, Agnieszka Grabska-Barwinska, Kyle R Taylor,\nYannis Assael, John Jumper, Pushmeet Kohli, and David R Kelley. “Eﬀective Gene Expression Prediction from\nSequence by Integrating Long-range Interactions”. In: Nature Methods 18.10 (2021), pp. 1196–1203.\n[3] Jimmy Ba, Geoﬀrey E Hinton, Volodymyr Mnih, Joel Z Leibo, and Catalin Ionescu. “Using Fast Weights to\nAttend to the Recent Past”. In: Advances in Neural Information Processing Systems (NeurIPS)

## RAG with LCEL

1. Creamos una función de ayuda, una vez recuperamos los documentos, los juntamos para añadirlos como contexto al prompt

In [11]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [29]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

In [31]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


lcel_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [32]:
lcel_chain.invoke("What is mamba?")



'Mamba is a strong candidate for a general sequence model backbone that out-performs prior state-of-the-art models in audio and genomics tasks, showing superior performance with longer context up to million-length sequences. It is the first linear-time sequence model to achieve Transformer-quality performance in language modeling, exceeding the performance of other baselines even with 1B parameters.'