In [None]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_groq import ChatGroq
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain_community.vectorstores import FAISS 
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.messages import AIMessage, HumanMessage

In [5]:
load_dotenv()

groq_api_key = os.environ["GROQ_API_KEY"]

In [6]:
llm = ChatGroq(api_key=groq_api_key,
               model="llama-3.1-70b-versatile")

In [23]:
# url = "https://docs.giskard.ai/en/stable/open_source/testset_generation/rag_evaluation/index.html"
url = "https://en.wikipedia.org/wiki/2011_census_of_India"

In [24]:
loader = WebBaseLoader(url)
docs = loader.load()
documents = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200).split_documents(docs)
vectors = FAISS.from_documents(documents, OpenAIEmbeddings())
retriever = vectors.as_retriever()

In [25]:
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [26]:
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [27]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

In [28]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [29]:
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


In [30]:
store = {}

In [31]:
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

In [32]:
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [35]:
conversational_rag_chain.invoke(
    {"input": "Give me the exact statistics"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'According to the 2011 census of India, the exact statistics are:\n\n- Total population: 1,210,854,977\n- Males: 623,724,568\n- Females: 586,469,294\n- Literacy rate: 74% (82.10% for males and 65.46% for females)\n- Density of population: 382 people per km²\n- Sex ratio: 943 females per 1000 males\n- Child sex ratio (0-6 age group): 919 females per 1000 males'

In [36]:
store

{'abc123': InMemoryChatMessageHistory(messages=[HumanMessage(content='What is this website about?', additional_kwargs={}, response_metadata={}), AIMessage(content='This website is about the 2011 census of India on Wikipedia, providing information on the 15th Indian census, including its history, population, demographics, and more.', additional_kwargs={}, response_metadata={}), HumanMessage(content='Give me the summary about the same?', additional_kwargs={}, response_metadata={}), AIMessage(content="The 2011 census of India was the 15th Indian census, conducted to gather data on the country's population, demographics, and socioeconomic characteristics. The census provided information on population growth, literacy rates, sex ratios, and language demographics, among other things. It also served as a basis for the National Population Register and the Socio Economic and Caste Census.", additional_kwargs={}, response_metadata={}), HumanMessage(content='Give me the exact statistics', additio

In [37]:
for message in store["abc123"].messages:
    if isinstance(message, AIMessage):
        prefix = "AI"
    else:
        prefix = "User"

    print(f"{prefix}: {message.content}\n")

User: What is this website about?

AI: This website is about the 2011 census of India on Wikipedia, providing information on the 15th Indian census, including its history, population, demographics, and more.

User: Give me the summary about the same?

AI: The 2011 census of India was the 15th Indian census, conducted to gather data on the country's population, demographics, and socioeconomic characteristics. The census provided information on population growth, literacy rates, sex ratios, and language demographics, among other things. It also served as a basis for the National Population Register and the Socio Economic and Caste Census.

User: Give me the exact statistics

AI: According to the 2011 census of India, the exact statistics are:

- Total population: 1,210,854,977
- Males: 623,724,568
- Females: 586,469,294
- Literacy rate: 74% (82.10% for males and 65.46% for females)
- Density of population: 382 people per km²
- Sex ratio: 943 females per 1000 males
- Child sex ratio (0-6 