# RAG chatbot

Load

In [27]:
from langchain_community.document_loaders import PyPDFLoader

file_path = "example_documents/deep_learning.pdf"

loader = PyPDFLoader(file_path)
pages = []
for page in loader.lazy_load():
    pages.append(page)

Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 28 0 (offset 0)
Ignoring wrong pointing object 34 0 (offset 0)
Ignoring wrong pointing object 40 0 (offset 0)
Ignoring wrong pointing object 42 0 (offset 0)
Ignoring wrong pointing object 48 0 (offset 0)
Ignoring wrong pointing object 54 0 (offset 0)
Ignoring wrong pointing object 65 0 (offset 0)
Ignoring wrong pointing object 67 0 (offset 0)
Ignoring wrong pointing object 69 0 (offset 0)
Ignoring wrong pointing object 83 0 (offset 0)
Ignoring wrong pointing object 85 0 (offset 0)
Ignoring wrong pointing object 95 0 (offset 0)
Ignoring wrong pointing object 156 0 (offset 0)
Ignoring wrong pointing object 188 0 (offset 0)
Ignoring wrong pointing object 223 0 (offset 0)
Ignoring wrong pointing object 269 0 (offset 0)
Ignoring wrong pointing object 400 0 (offset 0)
Ignoring 

Split

In [28]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(pages)

Store

In [29]:
import os
from dotenv import load_dotenv
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings

# the embeddings model wants the API explicitly for some reason
load_dotenv()
hf_key = os.getenv('HUGGINGFACEHUB_API_TOKEN')

embeddings_model = HuggingFaceInferenceAPIEmbeddings(
    api_key=hf_key, model_name="sentence-transformers/all-MiniLM-l6-v2"
)

from langchain_community.vectorstores import FAISS

vector_store = FAISS.from_documents(documents=all_splits, embedding=embeddings_model)

Retrieval

In [30]:
retriever = vector_store.as_retriever()

LLM

In [31]:
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace

llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
    max_new_tokens=1000,
    top_k=10,
    top_p=0.95,
    typical_p=0.95,
    temperature=0.01,
    repetition_penalty=1.03,
)

chat_model = ChatHuggingFace(llm=llm)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /Users/danielsuarez-mash/.cache/huggingface/token
Login successful


# History Aware Retriever

Takes 3 things:
- Query
- Chat history
- Retriever

This chain takes the query and chat history and reformulates a new 'contextualised' query which is then used with the retriever to find relevant documents. These documents are then the output upon invokation. It can be thought of as a replacement for a standard retriever.

In [32]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
from langchain_core.output_parsers import StrOutputParser

history_aware_retriever = contextualize_q_prompt | chat_model.bind(max_tokens=1000, temperature=1) | StrOutputParser() | retriever

Example

In [33]:
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.tracers import ConsoleCallbackHandler

chat_history = [
    HumanMessage(content=
                 "Hello, my name is Daniel and I work as a data scientist."
                 "I'd really enjoyed the methodology section of this project document. Especially the part on single-stage models."
                 ),
    AIMessage(content="Hello Daniel, nice to meet you. Sounds interesting."),
]

input = "Can you remind me of why that type of model was chosen for the project?"

documents = history_aware_retriever.invoke({
    "chat_history": chat_history,
    "input":"What observation was made about single-stage models?"
    },
    config={'callbacks': [ConsoleCallbackHandler()]})

[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > prompt:ChatPromptTemplate] Entering Prompt run with input:
[0m[inputs]
[36;1m[1;3m[chain/end][0m [1m[chain:RunnableSequence > prompt:ChatPromptTemplate] [2ms] Exiting Prompt run with output:
[0m[outputs]
[32;1m[1;3m[llm/start][0m [1m[chain:RunnableSequence > llm:ChatHuggingFace] Entering LLM run with input:
[0m{
  "prompts": [
    "System: Given a chat history and the latest user question which might reference context in the chat history, formulate a standalone question which can be understood without the chat history. Do NOT answer the question, just reformulate it if needed and otherwise return it as is.\nHuman: Hello, my name is Daniel and I work as a data scientist.I'd really enjoyed the methodology section of this project document. Especially the part on single-stage models.\nAI: Hello Daniel, nice to meet y

In [34]:
documents

[Document(metadata={'source': 'example_documents/deep_learning.pdf', 'page': 58, 'start_index': 564}, page_content='6 10/07/2020 - 19/07/2020 27/07/2020 – 02/08/2020 • Begin iterative process of improving performance of model • Record iterative improvements • Settle on final iteration • Critically analyse performance and computational requirements 7 20/07/2020 - 31/07/2020 31/07/2020 – 14/08/2020 • Produce first write-up draft 8 31/07/2020 - 14/08/2020 07/08/2020 – 14/08/2020 • Final opportunity for technical ideas or modifications to proposed model  • Consider feedback from supervisor to iteratively improve final project write-up  As shown above, the planned project schedule was adhered to for stage 1. This involved searching for and selecting a suitable dataset which contained good quality imagery and ground truth data. Almost all of the first few literature papers at least partly used this dataset to conduct'),
 Document(metadata={'source': 'example_documents/deep_learning.pdf', 'pa

# Question answer chain

This chain is designed to take 3 things:
- Context (documents provided by the retriever)
- Chat history
- User input

In [35]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
    "You are also given chat history below to help contextualise the question."
    "{chat_history}"
    "{input}"
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
    ("system", system_prompt)
    ]
)

qa_chain = qa_prompt | chat_model.bind(max_tokens=1000, temperature=1) | StrOutputParser()

## Example
Let's take the documents created in the previous step, the chat history and the user input to test this chain out.

In [36]:
qa_chain.invoke({"context": documents, "chat_history": chat_history, "input": input},
    config={'callbacks': [ConsoleCallbackHandler()]})

[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > prompt:ChatPromptTemplate] Entering Prompt run with input:
[0m[inputs]
[36;1m[1;3m[chain/end][0m [1m[chain:RunnableSequence > prompt:ChatPromptTemplate] [0ms] Exiting Prompt run with output:
[0m[outputs]
[32;1m[1;3m[llm/start][0m [1m[chain:RunnableSequence > llm:ChatHuggingFace] Entering LLM run with input:
[0m{
  "prompts": [
    "System: You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, say that you don't know. Use three sentences maximum and keep the answer concise.\n\n[Document(metadata={'source': 'example_documents/deep_learning.pdf', 'page': 58, 'start_index': 564}, page_content='6 10/07/2020 - 19/07/2020 27/07/2020 – 02/08/2020 • Begin iterative process of improving performance of model • Record iterative i

'According to page 17 of the document, the decision to create two-stage models was made after analyzing the research presented in chapter 2, which showed that two-stage models outperformed single-stage models such as the Single Shot Detector (SSD).'

# Retrieval chain
All we need now is to connect these two chains together. What is tricky is that we need the chat history and input to go to two places: (1) the history aware retriever and (2) the question answer chain. This is where we can use the RunnablePassthrough function which, in this case, passes through our chat history and input unchanged and into the QA chain.

In [37]:
from langchain_core.runnables import RunnablePassthrough

retrieval_chain = (
    {"context": history_aware_retriever, "chat_history": RunnablePassthrough(), "input": RunnablePassthrough()}
    | qa_chain
)

In [38]:
output = retrieval_chain.invoke({"chat_history": chat_history, "input": input},
    config={'callbacks': [ConsoleCallbackHandler()]})

[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,chat_history,input>] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,chat_history,input> > chain:RunnableSequence] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,chat_history,input> > chain:RunnableSequence > prompt:ChatPromptTemplate] Entering Prompt run with input:
[0m[inputs]
[36;1m[1;3m[chain/end][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,chat_history,input> > chain:RunnableSequence > prompt:ChatPromptTemplate] [1ms] Exiting Prompt run with output:
[0m[outputs]
[32;1m[1;3m[llm/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,chat_history,input> > chain:RunnableSequ

In [39]:
output

'The project chose two-stage models instead of single-stage models like SSD due to a lack of performance against two-stage models, as analyzed in chapter 2. This led to the decision to create the two-stage models presented in the project.'

## Update chat history

In [40]:
chat_history.extend([HumanMessage(content=input), AIMessage(content=output)])

In [41]:
chat_history

[HumanMessage(content="Hello, my name is Daniel and I work as a data scientist.I'd really enjoyed the methodology section of this project document. Especially the part on single-stage models.", additional_kwargs={}, response_metadata={}),
 AIMessage(content='Hello Daniel, nice to meet you. Sounds interesting.', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Can you remind me of why that type of model was chosen for the project?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='The project chose two-stage models instead of single-stage models like SSD due to a lack of performance against two-stage models, as analyzed in chapter 2. This led to the decision to create the two-stage models presented in the project.', additional_kwargs={}, response_metadata={})]

In [42]:
input2 = "And was anything said about the other type of model?"

output = retrieval_chain.invoke({"chat_history": chat_history, "input": input2},
    config={'callbacks': [ConsoleCallbackHandler()]})

[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,chat_history,input>] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,chat_history,input> > chain:RunnableSequence] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,chat_history,input> > chain:RunnableSequence > prompt:ChatPromptTemplate] Entering Prompt run with input:
[0m[inputs]
[36;1m[1;3m[chain/end][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,chat_history,input> > chain:RunnableSequence > prompt:ChatPromptTemplate] [0ms] Exiting Prompt run with output:
[0m[outputs]
[32;1m[1;3m[llm/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,chat_history,input> > chain:RunnableSequ