In [27]:
import os
import pandas as pd
from dotenv import load_dotenv

from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain_community.embeddings import OllamaEmbeddings
from langchain_openai import OpenAIEmbeddings

# from langchain_community.embeddings import HuggingFaceBgeEmbeddings

from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain_community.vectorstores import FAISS 

from giskard.rag import KnowledgeBase, generate_testset, evaluate



from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder


from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory

In [3]:
load_dotenv()

groq_api_key = os.environ["GROQ_API_KEY"]

In [4]:
llm = ChatGroq(api_key=groq_api_key,
               model="llama-3.1-70b-versatile")

In [14]:
url = "https://docs.giskard.ai/en/stable/open_source/testset_generation/rag_evaluation/index.html"

In [None]:
loader = WebBaseLoader(url)
docs = loader.load()
documents = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200).split_documents(docs)
vectors = FAISS.from_documents(documents, OpenAIEmbeddings())
retriever = vectors.as_retriever()

In [30]:
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [31]:
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [32]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

In [33]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [34]:
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


In [35]:
store = {}

In [36]:
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

In [37]:
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [39]:
conversational_rag_chain.invoke(
    {"input": "What is Giskard?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'The provided context does not explicitly define what Giskard is, but it appears to be a project or platform with a GitHub community, Discord community, and documentation on how to configure a local development environment and its architecture.'

In [40]:
store

{'abc123': InMemoryChatMessageHistory(messages=[HumanMessage(content='What is Task Decomposition?', additional_kwargs={}, response_metadata={}), AIMessage(content='The provided context does not mention "Task Decomposition" explicitly.', additional_kwargs={}, response_metadata={}), HumanMessage(content='What is Giskard?', additional_kwargs={}, response_metadata={}), AIMessage(content='The provided context does not explicitly define what Giskard is, but it appears to be a project or platform with a GitHub community, Discord community, and documentation on how to configure a local development environment and its architecture.', additional_kwargs={}, response_metadata={})])}