# Stateful Conversational RAG
A conversational LLM app using LangChain & Chroma. Built from these tutorials: 
- [LangChain RAG](https://python.langchain.com/v0.2/docs/tutorials/rag/)
- [Conversational RAG](https://python.langchain.com/v0.2/docs/tutorials/qa_chat_history/)

This exercise builds on the `conversatinal-rag.ipynb` exercise and adds in:
- Stateful management of chat history with LangChain Expression Language (LCEL) 
- Sourcing multiple web based documents

In [None]:
%pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-chroma beautifulsoup4
%pip install -qU langchain-openai
%pip install python-dotenv
%pip install langchain_core

In [2]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

# Load environment variables from .env file
load_dotenv()

llm = ChatOpenAI(model="gpt-4o-mini", api_key=os.getenv("OPENAI_API_KEY"))

# Enable tracing with LangSmith
# LANGCHAIN_API_KEY environment variable is set in .env
os.environ['LANGCHAIN_TRACING_V2'] = "true"
os.environ['LANGCHAIN_PROJECT'] = "stateful-conversational-rag"

# Set the USER_AGENT environment variable
os.environ['USER_AGENT'] = 'conversational-rag-agent'

## 1. Load, chunk and index source documents to create a retriever.

In [9]:
import bs4
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = WebBaseLoader(
    web_paths=(
        [
            "https://www.andyfitzgeraldconsulting.com/insights/what-is-information-architecture/",
            "https://www.andyfitzgeraldconsulting.com/insights/when-to-use-an-ia/",
            "https://www.andyfitzgeraldconsulting.com/insights/working-with-an-ia/",
            "https://www.andyfitzgeraldconsulting.com/insights/how-to-hire-an-ia/",
        ]
    ),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer("article"),    
    )
)

docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(splits, OpenAIEmbeddings())
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

## 2. Add the retriever into a question-answering chain

In [4]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)


## 3. Add runnable with message history
This example uses LangChain's `runnableWithMessageHistory` to manage chat histories in a simple dict. 

In [5]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

# contextualize the question
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

# statefully manage chat history
store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

## 4. Invoke the chain

In [None]:
import uuid

# create a new id each time this cell is run
session_id = str(uuid.uuid4())

while True:
    question = input("> ")
    if question == "q":
        break
    response = conversational_rag_chain.invoke(
        {"input": question},
        config={"configurable": {"session_id": session_id}},
    )["answer"]
    print(response, end="\n\n")


In [None]:
# inspect chat history object
import json

def serialize_message_history(history):
    return [message.dict() for message in history.messages]

pretty_store = {session_id: serialize_message_history(history) for session_id, history in store.items()}
print(json.dumps(pretty_store, indent=4))