# **Conversational RAG PART 1:**

In [1]:
# install necessary libaries:

%pip install --upgrade --quiet sentence_transformers
%pip install --upgrade --quiet  langchain langchain-community langchainhub langchain_google_genai langchain-chroma bs4 boto3
%pip install --upgrade --quiet langchain-aws

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.17.1 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 5.29.0 which is incompatible.
tensorflow-metadata 1.13.1 requires protobuf<5,>=3.20.3, but you have protobuf 5.29.0 which is incompatible.[0m[31m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.7/87.7 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h

## **Vector Store:**

In [2]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter



In [None]:
# Get the Embeddings:

from langchain.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)

In [4]:
# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

In [5]:
# retriever:

retriever = vectorstore.as_retriever(search_kwargs=dict(k=5))

## **Load LLM from AWS Bedrock:**

In [6]:
import os
import boto3
from langchain_aws import ChatBedrock

# Method 1: Setting Environment Variables
os.environ["AWS_ACCESS_KEY_ID"] = "*************"
os.environ["AWS_SECRET_ACCESS_KEY"] = "**********************"
os.environ["AWS_DEFAULT_REGION"] = "********"


### **BedrockLLM:**

In [None]:
from langchain_aws import BedrockLLM

llm1 = BedrockLLM(
    credentials_profile_name="bedrock-admin", model_id="mistral.mistral-7b-instruct-v0:2"
)

res = llm1.invoke("Tell me something about yourself.")
res

### **ChatBedrock:**

In [8]:
# Ensure your AWS credentials are configured
import time
from langchain_aws import ChatBedrock

model1 = "mistral.mistral-7b-instruct-v0:2"
model2 = "meta.llama3-8b-instruct-v1:0"
model3 = "apac.anthropic.claude-3-5-sonnet-20240620-v1:0"


llm2 = ChatBedrock(model=model2,
    beta_use_converse_api=True)


res = llm2.invoke("Tell me something about yourself.")
res

AIMessage(content="\n\nI'm just an AI, so I don't have personal experiences, emotions, or a physical presence. I exist solely as a digital entity, designed to process and generate human-like text based on the inputs I receive.\n\nI was created through a process called deep learning, which involves training artificial neural networks on large amounts of data. This allows me to learn patterns and relationships in language, enabling me to understand and respond to a wide range of questions and prompts.\n\nI don't have personal opinions or biases, and I'm not capable of experiencing the world in the same way that humans do. However, I'm designed to be helpful and informative, and I strive to provide accurate and relevant responses to the questions and topics you're interested in.\n\nI'm constantly learning and improving, so I appreciate any feedback or corrections you can provide. This helps me to refine my language processing abilities and provide better responses in the future.", additio

## **Building a Basic RAG Chain:**

In [12]:
from langchain_core.output_parsers import StrOutputParser  # Import the StrOutputParser class for parsing the output of the language model
from langchain_core.runnables import RunnablePassthrough  # Import the RunnablePassthrough class for passing the question as-is
from langchain_core.prompts import PromptTemplate  # Import the PromptTemplate class from the langchain_core.prompts module

In [21]:
# Customizing the prompt
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}  # This placeholder will be replaced with the retrieved context (relevant documents)

Question: {question}  # This placeholder will be replaced with the user's question

Helpful Answer:"""  # This is the prompt for the language model to generate a helpful answer

# Create a PromptTemplate instance from the template string
custom_rag_prompt = PromptTemplate.from_template(template)

In [22]:
# create RAG Chain:

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}  # Retrieve and format relevant documents, pass the question as-is
    | custom_rag_prompt  # Apply the custom prompt to the context and question
    | llm2  # Pass the prompted input to the language model
    | StrOutputParser()  # Parse the output of the language model as a string
)


response = rag_chain.invoke("What is the LLMChain?")

In [23]:
response

'\n\nThe LLMChain is a workflow implemented in LangChain that combines CoT reasoning with tools relevant to the tasks. It is used to accomplish tasks across organic synthesis, drug discovery, and materials design. The LLM is provided with a list of tool names, descriptions of their utility, and details about the expected input/output.\n\nThanks for asking!'

In [None]:
# # Stream the output of the RAG chain for the question "What is LLM?"
# for chunk in rag_chain.stream("What is Few shot learning?"):
#     print(chunk, end="", flush=True)  # Print each chunk of the output without newlines and flush the output buffer

## **RAG with Chat_History:**

<img src="https://miro.medium.com/v2/resize:fit:720/format:webp/0*MhkcbRh7FpXkKiCN.png" alt="rag_with_chat_history"> </img>

<br>

To create a truly conversational and informative AI assistant, it’s crucial to consider the context of previous interactions. This is where contextualizing the question becomes essential. By analyzing the chat history, we can refine the search query, ensuring that the retrieved information is highly relevant to the current conversation.

### **create_history_aware_retriever:**

**Contextualizing the Question:**<br>
The **`create_history_aware_retriever`** function builds a retriever that considers the chat history. It uses a custom prompt (defined with ChatPromptTemplate) to rewrite the user question based on the context.

In [24]:
from langchain.chains import create_history_aware_retriever  # Import the create_history_aware_retriever function
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder  # Import the ChatPromptTemplate and MessagesPlaceholder classes

In [25]:
# Define the system prompt for contextualizing the question:

contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""


# Create a ChatPromptTemplate for contextualizing the question:
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),  # Set the system prompt
        MessagesPlaceholder("chat_history"),  # Placeholder for the chat history
        ("human", "{input}"),  # Placeholder for the user's input question
    ]
)


# Create a history-aware retriever
history_aware_retriever = create_history_aware_retriever(
    llm2,  # Pass the language model instance
    retriever,  # Pass the retriever instance
    contextualize_q_prompt  # Pass the prompt for contextualizing the question
)


history_aware_retriever

RunnableBinding(bound=RunnableBranch(branches=[(RunnableLambda(lambda x: not x.get('chat_history', False)), RunnableLambda(lambda x: x['input'])
| VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x79f7e25b34f0>, search_kwargs={'k': 5}))], default=ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMess

### **`create_stuff_documents_chain:`**
This function is used to create a chain that combines multiple documents or text chunks into a single input for the language model.

In [27]:
from langchain.chains import create_retrieval_chain  # Import the create_retrieval_chain function from the langchain.chains module
from langchain.chains.combine_documents import create_stuff_documents_chain
# Import the create_stuff_documents_chain function from the langchain.chains.combine_documents module

In [36]:
# Define the system prompt for the question-answering task:
qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
you can answer the questions within 256 tokens. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""  # This placeholder will be replaced with the retrieved context


# Create a ChatPromptTemplate for the question-answering task
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),  # Set the system prompt
        MessagesPlaceholder("chat_history"),  # Placeholder for the chat history
        ("human", "{input}"),  # Placeholder for the user's input question
    ]
)


# Create a chain for question-answering using the language model and the question-answering prompt
question_answer_chain = create_stuff_documents_chain(llm2, qa_prompt)

### **Final Step:**

Let's create the final **rag_chain** which will be the combination of **context-aware-retrieval chain**, and a **question-and-answer chain**.
<br>


**create_retrieval_chain:** function is used to create a retrieval chain, which combines a retriever (e.g., a vector database) and a language model to retrieve and process relevant information based on a given query.

In [37]:
# Create a Retrieval-Augmented Generation (RAG) chain
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [38]:
from langchain_core.messages import HumanMessage  # Import the HumanMessage class


chat_history = []  # Initialize an empty list to store the chat history

In [39]:
# Ask the first question:

first_question = "What is LLM?"
ai_response_1 = rag_chain.invoke({"input": first_question, "chat_history": chat_history})  # Invoke the RAG chain with the question and an empty chat history
print('user query:', first_question)
print('ai response:', ai_response_1["answer"])  # Print the answer from the RAG chain
chat_history.extend([HumanMessage(content=first_question), ai_response_1["answer"]])  # Add the question and answer to the chat history


user query: What is LLM?
ai response: 

LLM stands for Large Language Model. It refers to a type of artificial intelligence model that is trained on a large corpus of text data to generate human-like language outputs.


In [40]:
chat_history

[HumanMessage(content='What is LLM?', additional_kwargs={}, response_metadata={}),
 '\n\nLLM stands for Large Language Model. It refers to a type of artificial intelligence model that is trained on a large corpus of text data to generate human-like language outputs.']

In [41]:
# Ask the second question:

second_question = "What are the different types of it?"
ai_response_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})  # Invoke the RAG chain with the second question and the updated chat history
chat_history.extend([HumanMessage(content=second_question), ai_response_2["answer"]])  # Add the second question and answer to the chat history
print('user query:', (second_question))
print('ai response:', ai_response_2["answer"])

user query: What are the different types of it?
ai response: 

According to the context, LLMs (Large Language Models) are trained on a large corpus of text data to generate human-like language outputs.


In [42]:
chat_history

[HumanMessage(content='What is LLM?', additional_kwargs={}, response_metadata={}),
 '\n\nLLM stands for Large Language Model. It refers to a type of artificial intelligence model that is trained on a large corpus of text data to generate human-like language outputs.',
 HumanMessage(content='What are the different types of it?', additional_kwargs={}, response_metadata={}),
 '\n\nAccording to the context, LLMs (Large Language Models) are trained on a large corpus of text data to generate human-like language outputs.']

In [43]:
# Ask the third question
third_question = "Can you translate your previous response to French?"
ai_response_3 = rag_chain.invoke({"input": third_question, "chat_history": chat_history})  # Invoke the RAG chain with the third question and the updated chat history
print('user query:', (third_question))
print('ai response:', ai_response_3["answer"])   # Print the answer from the RAG chain
chat_history.extend([HumanMessage(content=third_question), ai_response_3["answer"]])

user query: Can you translate your previous response to French?
ai response: 

Here is the answer:

LLM stands for Large Language Model. It refers to a type of artificial intelligence model that is trained on a large corpus of text data to generate human-like language outputs.

There are no specific types mentioned in the provided context.

And here is the translation of my previous response to French:

LLM signifie modèle de langage large. Il s'agit d'un type de modèle d'intelligence artificielle entraîné sur un grand corpus de données de texte pour générer des sorties de langage similaires à celles de l'homme.

Il n'y a pas de types spécifiques mentionnés dans le contexte fourni.


In [44]:
chat_history

[HumanMessage(content='What is LLM?', additional_kwargs={}, response_metadata={}),
 '\n\nLLM stands for Large Language Model. It refers to a type of artificial intelligence model that is trained on a large corpus of text data to generate human-like language outputs.',
 HumanMessage(content='What are the different types of it?', additional_kwargs={}, response_metadata={}),
 '\n\nAccording to the context, LLMs (Large Language Models) are trained on a large corpus of text data to generate human-like language outputs.',
 HumanMessage(content='Can you translate your previous response to French?', additional_kwargs={}, response_metadata={}),
 "\n\nHere is the answer:\n\nLLM stands for Large Language Model. It refers to a type of artificial intelligence model that is trained on a large corpus of text data to generate human-like language outputs.\n\nThere are no specific types mentioned in the provided context.\n\nAnd here is the translation of my previous response to French:\n\nLLM signifie m