# QUICKSTART ([source](https://python.langchain.com/docs/use_cases/question_answering/quickstart))

LOAD THE DOCUMENTS

In [None]:
!pip3 install pypdf

In [5]:
from os import getenv
from dotenv import load_dotenv

load_dotenv(".env")

True

In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("docs/caperucitaroja.pdf")
docs = loader.load()
pages = loader.load_and_split()
len(pages)

SPLIT DOCUMENT INTO CHUNCKS FOR EMBEDDING AND VECTOR STORAGE

"In this case we’ll split our documents into chunks of 1000 characters with 200 characters of overlap between chunks. The overlap helps mitigate the possibility of separating a statement from important context related to it. We use the RecursiveCharacterTextSplitter, which will recursively split the document using common separators like new lines until each chunk is the appropriate size. This is the recommended text splitter for generic text use cases." [source](https://python.langchain.com/docs/use_cases/question_answering/quickstart#indexing-split)

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    # add_start_index=True
)
all_splits = text_splitter.split_documents(docs)
len(all_splits)

STORE

"We need to index our chunks so we can search over them at runtime. The most common way to do this is to embed the contents of each document split and insert these embeddings into a vector database (or vector store). When we want to search over our splits, we take a text search query, embed it, and perform some sort of “similarity” search to identify the stored splits with the most similar embeddings to our query embedding. The simplest similarity measure is cosine similarity — we measure the cosine of the angle between each pair of embeddings (which are high dimensional vectors)." [source](https://python.langchain.com/docs/use_cases/question_answering/quickstart#indexing-store)

In [None]:
from langchain_community.vectorstores import (
    Chroma,
)  # Options: https://python.langchain.com/docs/integrations/vectorstores

from langchain_community.embeddings import (
    GPT4AllEmbeddings,
)

vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())

In [None]:
retriever = vectorstore.as_retriever(
    search_type="similarity", 
    search_kwargs={
        # Returns the top k documents
        "k": 6,
    }
)
retrieved_docs = retriever.invoke("Como se llama la protagonista?")

In [None]:
len(retrieved_docs)

In [None]:
print(retrieved_docs[0].page_content)

GENERATE

"Let’s put it all together into a chain that takes a question, retrieves relevant documents, constructs a prompt, passes that to a model, and parses the output." [source](https://python.langchain.com/docs/use_cases/question_answering/quickstart#retrieval-and-generation-generate)

In [None]:
from langchain_community.llms import GPT4All

# Mode options:
# https://python.langchain.com/docs/integrations/chat/

llm = GPT4All(
    model="models/mistral-7b-openorca.gguf2.Q4_0.gguf",  # https://gpt4all.io/models/gguf/mistral-7b-openorca.gguf2.Q4_0.gguf
    max_tokens=2048,
    temp=0.5,
    n_threads=8,
)

Original prompt ([source](https://python.langchain.com/docs/use_cases/question_answering/quickstart#retrieval-and-generation-generate))

In [None]:
# from langchain import hub

# prompt = hub.pull("rlm/rag-prompt")

New prompt ([source](https://python.langchain.com/docs/use_cases/question_answering/quickstart#retrieval-and-generation-generate))

In [None]:
from langchain.prompts import PromptTemplate

prompt_template = PromptTemplate.from_template(
    """
    Eres un asistente para responder preguntas. 
    Utiliza los siguientes fragmentos de contexto recuperado para responder la pregunta. 
    Si no conoces la respuesta, simplemente di que no la sabes. 
    Usa máximo tres oraciones y mantén la respuesta concisa.
    Pregunta: {question}
    Contexto: {context}
    Respuesta:
    """
)
prompt_template.format(question="pregunta ejemplo", context="contexto ejemplo")

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"question": RunnablePassthrough(), "context": retriever | format_docs}
    | prompt_template
    | llm
    | StrOutputParser()
)

In [None]:
rag_chain.invoke("A quien iba a visitar caperucita roja?")

In [None]:
for chunk in rag_chain.stream("A quien iba a visitar caperucita roja?"):
    print(chunk, end="", flush=True)

In [None]:
rag_chain.invoke(
    "Que preguntas le hizo caperucita roja al lobo disfrazado de su abuela?"
)

In [None]:
for chunk in rag_chain.stream(
    "Que preguntas le hizo caperucita roja al lobo disfrazado de su abuela?"
):
    print(chunk, end="", flush=True)

In [None]:
rag_chain.invoke(
    "El lobo se comio a la abuela de caperucita roja?"
)

In [None]:
rag_chain.invoke(
    "Que pasa con el lobo en esta version del cuento?"
)

# RETURNING SOURCES ([source](https://python.langchain.com/docs/use_cases/question_answering/sources))

ADDING SOURCES

With LCEL it's easy to return the retrieved documents:

In [None]:
from langchain_core.runnables import RunnableParallel


rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt_template
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"question": RunnablePassthrough(), "context": retriever}
).assign(answer=rag_chain_from_docs)

In [None]:
rag_chain_with_source.invoke(
    "Que pasa con el lobo en esta version del cuento?"
)

# ADD CHAT HISTORY ([source](https://python.langchain.com/docs/use_cases/question_answering/chat_history))

Allows the user to have a back-and-forth conversation.

"In this guide we focus on adding logic for incorporating historical messages, and NOT on chat history management. Chat history management is [covered here](https://python.langchain.com/docs/expression_language/how_to/message_history)."

We need to update two things about our existing app:
- **Prompt:** Add support to historical messages as an input.
- **Contextualizing questions:** Add a sub-chain that takes the latest user question and reformulates it in the context of the chat history. This is needed in case the latest question references some context from past messages. For example, if a user asks a follow-up question like “Can you elaborate on the second point?”, this cannot be understood without the context of the previous message. Therefore we can’t effectively perform retrieval with a question like this.

## Contextualizing the question ([source](https://python.langchain.com/docs/use_cases/question_answering/chat_history#contextualizing-the-question))

In [None]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

contextualize_q_system_prompt = """Se proporciona un historial de chat y la última pregunta \
    del usuario, la cual podría referirse al contexto del historial. Formula una pregunta independiente \
    que se pueda entender sin el historial. NO respondas la pregunta, solo reformúlala si es necesario, \
    de lo contrario, devuélvela tal cual.
"""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)
contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()

Using this chain we can ask follow-up questions that reference past messages and have them reformulated into standalone questions:

In [None]:
from langchain_core.messages import AIMessage, HumanMessage

contextualize_q_chain.invoke(
    {
        "chat_history": [
            HumanMessage(content="Que pasa con el lobo en esta version del cuento?"),
            AIMessage(content="En esta versión del cuento, el lobo intenta engañar y comer a Caperucita Roja pero es desmantelado por la niña y su abuela. El lobo se mete en la casa de la abuela para comérsela, pero al final es derrotado y no vuelve a aparecer."),
        ],
        "question": "A que te refieres con desmantelado?",
    }
)

## Chain with chat history ([source](https://python.langchain.com/docs/use_cases/question_answering/chat_history#chain-with-chat-history))

In [None]:
qa_system_prompt = """Eres un asistente para responder preguntas. \
Utiliza la siguiente información del contexto para responder la pregunta. \
Si no sabes la respuesta, simplemente di que no la sabes. \
Intenta mantener la respuesta concisa y usar máximo tres oraciones. \
{context}
"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

def contextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]

rag_chain = (
    RunnablePassthrough.assign(
        context=contextualized_question | retriever | format_docs
    )
    | qa_prompt
    | llm
)

In [None]:
chat_history = []

question = "Que pasa con el lobo en esta version del cuento?"
ai_msg = rag_chain.invoke({"question": question, "chat_history": chat_history})
ai_msg

In [None]:
chat_history.extend([HumanMessage(content=question), ai_msg])

second_question = "Como derrotaron al lobo?"
rag_chain.invoke({"question": second_question, "chat_history": chat_history})

In [None]:
output = {}
curr_key = None
for chunk in rag_chain_with_source.stream("Quien es el villano de la historia?"):
    for key in chunk:
        if key not in output:
            output[key] = chunk[key]
        else:
            output[key] += chunk[key]
        if key != curr_key:
            print(f"\n\n{key}: {chunk[key]}", end="", flush=True)
        else:
            print(chunk[key], end="", flush=True)
        curr_key = key
output

# STREAMING

In [None]:
from operator import itemgetter

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.tracers.log_stream import LogStreamCallbackHandler

contextualize_q_system_prompt = """Dado un historial de chat y la última pregunta del usuario \
que podría hacer referencia al contexto en el historial de chat, formula una pregunta independiente \
que se pueda entender sin el historial de chat. NO respondas la pregunta, simplemente reformúlala \
si es necesario y, de lo contrario, devuélvela tal cual."""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)
contextualize_q_chain = (contextualize_q_prompt | llm | StrOutputParser()).with_config(
    tags=["contextualize_q_chain"]
)

qa_system_prompt = """Eres un asistente para tareas de preguntas y respuestas. \
Utiliza la siguiente información del contexto recuperado para responder la pregunta. \
Si no sabes la respuesta, simplemente di que no lo sabes. \
Utiliza un máximo de tres oraciones y mantén la respuesta concisa.\

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)


def contextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]


rag_chain = (
    RunnablePassthrough.assign(context=contextualize_q_chain | retriever | format_docs)
    | qa_prompt
    | llm
)

In [None]:
# Needed for running async functions in Jupyter notebook:
import nest_asyncio

nest_asyncio.apply()

In [None]:
from langchain_core.messages import HumanMessage

chat_history = []

question = "Que pasa con el lobo en esta version del cuento?"
ai_msg = rag_chain.invoke({"question": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question), ai_msg])

second_question = "Como derrotaron al lobo?"

In [None]:
# ct = 0
# async for jsonpatch_op in rag_chain.astream_log(
#     {"question": second_question, "chat_history": chat_history},
#     include_names=["Retriever"],
#     with_streamed_output_list=False,
# ):
#     print(jsonpatch_op)
#     print("\n" + "-" * 30 + "\n")
#     ct += 1
#     if ct > 20:
#         break

In [None]:
# ct = 0
# async for jsonpatch_op in rag_chain.astream_log(
#     {"question": second_question, "chat_history": chat_history},
#     include_tags=["contextualize_q_chain"],
# ):
#     print(jsonpatch_op)
#     print("\n" + "-" * 30 + "\n")
#     ct += 1
#     if ct > 20:
#         break

# Per-User Retrieval

## Step 1: Make sure the retriever you are using supports multiple users

Each vectorstore and retriever may have their own, and may be called different things (namespaces, multi-tenancy, etc). For vectorstores, this is generally exposed as a keyword argument that is passed in during similarity_search. By reading the documentation or source code, figure out whether the retriever you are using supports multiple users, and, if so, how to use it.

## Step 2: Add that parameter as a configurable field for the chain

## Step 3: Call the chain with that configurable field

# PINECONE EXAMPLE

In [1]:
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_pinecone import PineconeVectorStore

In [3]:
from os import getenv
from dotenv import load_dotenv

load_dotenv(".env")

True

In [7]:
embeddings = GPT4AllEmbeddings()
# pinecone dimensions: 384
vectorstore = PineconeVectorStore(
    index_name="test-example",
    embedding=embeddings,
    pinecone_api_key=getenv("PINECONE_API_KEY")
)

bert_load_from_file: gguf version     = 2
bert_load_from_file: gguf alignment   = 32
bert_load_from_file: gguf data offset = 695552
bert_load_from_file: model name           = BERT
bert_load_from_file: model architecture   = bert
bert_load_from_file: model file type      = 1
bert_load_from_file: bert tokenizer vocab = 30522


In [8]:
vectorstore.add_texts(["i worked at kensho"], namespace="harrison")
vectorstore.add_texts(["i worked at facebook"], namespace="ankush")

['a24e3224-20a0-4a3f-82f8-a0e68588f538']

In [9]:
# This will only get documents for Ankush
vectorstore.as_retriever(search_kwargs={"namespace": "ankush"}).get_relevant_documents(
    "where did i work?"
)

[Document(page_content='i worked at facebook')]

In [10]:
# This will only get documents for Harrison
vectorstore.as_retriever(
    search_kwargs={"namespace": "harrison"}
).get_relevant_documents("where did i work?")

[Document(page_content='i worked at kensho')]

In [11]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import (
    ConfigurableField,
    RunnableBinding,
    RunnableLambda,
    RunnablePassthrough,
)
from langchain.llms import GPT4All

In [12]:
template = """Answer the question based only on the following context:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

model = GPT4All(
    model="models/mistral-7b-openorca.gguf2.Q4_0.gguf",
    max_tokens=2048,
    temp=0.5,
    n_threads=8,
)

retriever = vectorstore.as_retriever()

In [13]:
configurable_retriever = retriever.configurable_fields(
    search_kwargs=ConfigurableField(
        id="search_kwargs",
        name="Search Kwargs",
        description="The search kwargs to use",
    )
)

In [14]:
chain = (
    {"context": configurable_retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [15]:
chain.invoke(
    "where did the user work?",
    config={"configurable": {"search_kwargs": {"namespace": "harrison"}}},
)

'Answer: The user worked at Kensho.'

In [16]:
chain.invoke(
    "where did the user work?",
    config={"configurable": {"search_kwargs": {"namespace": "ankush"}}},
)

'Answer: The user worked at Facebook.'

# CITATION

In [18]:
!pip install -qU langchain langchain-anthropic langchain-community