# **Conversational RAG PART 2:**

In [27]:
# install necessary libaries:

%pip install --upgrade --quiet sentence_transformers
%pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-google-genai langchain-chroma bs4 boto3
%pip install --upgrade --quiet langchain-aws

In [28]:
# Load the Tokens:

from google.colab import userdata
import os

GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')
HF_TOKEN = userdata.get('HF_TOKEN')

os.environ['GOOGLE_API_KEY'] = GEMINI_API_KEY
os.environ['HF_TOKEN'] = HF_TOKEN

## **Load Model/LLM:**

In [29]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0.4,
    max_tokens=512,
    timeout=None,
    max_retries=2,
    # other params...
)

print(model.invoke("hi").content)

Hi there! How can I help you today?



## **Load Embeddings:**

In [30]:
# Get the Embeddings:

from langchain.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)

## **Vector Store:**

In [None]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [31]:
# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

In [32]:
# retriever:

retriever = vectorstore.as_retriever(search_kwargs=dict(k=5))

In [None]:
retriever.get_relevant_documents(query="What is LLM?")

## **Basic RAG Work Flow:**

In [34]:
from langchain_core.prompts import ChatPromptTemplate

In [35]:
# Define System Prompt:

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer the question "
    "If you don't know the answer, say that you don't know."
    "Use three sentences maximum and keep the answer concise."
    "\n\n"
    "{context}"
)

chat_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [37]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

In [38]:
# Question-Answering Chains
question_answering_chain=create_stuff_documents_chain(model, chat_prompt)

# Retriever Chain:
rag_chain = create_retrieval_chain(retriever, question_answering_chain)

In [39]:
response = rag_chain.invoke({"input":"what is MRKL?"})
response["answer"]

'MRKL (Modular Reasoning, Knowledge and Language) is a neuro-symbolic architecture for autonomous agents.  It uses a collection of expert modules, and a large language model (LLM) acts as a router to direct inquiries to the appropriate module.  These modules can be neural networks or symbolic systems like calculators or APIs.\n'

## **RAG Based Chatbot With Memory(Chat History):**

In [42]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

### **Step 1: Create History-Aware-Retriever**

In [43]:
# Create History Aware Retriever:

retriever_prompt = (
    "Given a chat history and the latest user question which might reference context in the chat history,"
    "formulate a standalone question which can be understood without the chat history."
    "Do NOT answer the question, just reformulate it if needed and otherwise return it as is."
)


contextualize_q_prompt  = ChatPromptTemplate.from_messages(
    [
        ("system", retriever_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),


     ]
)

history_aware_retriever = create_history_aware_retriever(model, retriever, contextualize_q_prompt)

### **Step 2: Define the Custom System Prompts and Create Question-Aware-Chain**

In [45]:
# Define the Custom System Prompts and Create Question-Aware-Chain:

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer the question "
    "If you don't know the answer, say that you don't know."
    "Use three sentences maximum and keep the answer concise."
    "\n\n"
    "{context}"
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(model, qa_prompt)

### **Step 3: Create Rag-Chain using history_aware_retriever and question_answer_chain**

In [46]:
# Create Rag-Chain using history_aware_retriever and question_answer_chain:

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

### **Step 4: Generate Response and Manually add the Conversation (Human & AI) to chat_history:**

In [48]:
from langchain_core.messages import HumanMessage, AIMessage

# Define or Initialized the chat_hostory
chat_history = []

In [49]:
# Generation 01:
question = "what is Task Decomposition?"
response = rag_chain.invoke({"input": question, "chat_history": chat_history})

print("Question:\n", question)
print("Answer:\n", response["answer"])



# Add to chat_hostory (question and response)
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=response["answer"]),
    ]
)

Question:
 what is Task Decomposition?
Answer:
 Task decomposition is a technique used to break down complex tasks into smaller, more manageable steps.  Chain of thought (CoT) prompting encourages this decomposition by instructing the model to "think step by step".  This approach improves model performance and offers insights into the model's reasoning process.



In [50]:
chat_history

[HumanMessage(content='what is Task Decomposition?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Task decomposition is a technique used to break down complex tasks into smaller, more manageable steps.  Chain of thought (CoT) prompting encourages this decomposition by instructing the model to "think step by step".  This approach improves model performance and offers insights into the model\'s reasoning process.\n', additional_kwargs={}, response_metadata={})]

In [51]:
# Generation 02:
question = "What are common ways of doing it?"
response = rag_chain.invoke({"input": question, "chat_history": chat_history})

print("Question:\n",question)
print("Answer:\n",response["answer"])



# Add to chat_hostory (question and response)
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=response["answer"]),
    ]
)

Question:
 What are common ways of doing it?
Answer:
 Task decomposition can be achieved by prompting LLMs with simple instructions to list steps or subgoals, using task-specific instructions (e.g., "Write a story outline"), or through direct human input.  Tree of Thoughts further extends this by exploring multiple reasoning possibilities at each step, creating a tree of potential solutions.



In [52]:
chat_history

[HumanMessage(content='what is Task Decomposition?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Task decomposition is a technique used to break down complex tasks into smaller, more manageable steps.  Chain of thought (CoT) prompting encourages this decomposition by instructing the model to "think step by step".  This approach improves model performance and offers insights into the model\'s reasoning process.\n', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='What are common ways of doing it?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Task decomposition can be achieved by prompting LLMs with simple instructions to list steps or subgoals, using task-specific instructions (e.g., "Write a story outline"), or through direct human input.  Tree of Thoughts further extends this by exploring multiple reasoning possibilities at each step, creating a tree of potential solutions.\n', additional_kwargs={}, response_metadata={})]

 ### **Note:**

 Here we pass the **`chat_history`** manually, means manually pass/add the chat history (**HumanMessages** & **AIMessages**).<br>

 <u>This is not a correct approach, so LangChain provides some modules to do automated way.</u>