# Conversational RAG Application With LangChain and OpenAI LLM

In [2]:
# Initial the necessary packages
!pip install langchain -qU
!pip install langchain-openai -qU
!pip install langchain-chroma -qU
!pip install langchain_community -qU

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.4/63.4 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m438.4/438.4 kB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.0/19.0 MB[0m [31m87.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m23.8 MB/s[0m eta [36m0:00:0

In [3]:
import os
from google.colab import userdata

# Initial OPENAI LLM

In [4]:
# Initial OPENAI LLM

In [5]:
from langchain_openai import ChatOpenAI

#SET OPENAI API Key
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

# initialized the CahtOpenAI model
llm=ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# Initialized Embedding Model

In [6]:
from langchain_openai import OpenAIEmbeddings
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

# Load PDF Document

In [7]:
!pip install pypdf -qU

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/303.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m303.4/303.4 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [8]:
from langchain_community.document_loaders import PyPDFDirectoryLoader

#Load the PDF Document
loader = PyPDFDirectoryLoader("/content/Material Synthesis")

docs = loader.load()

In [9]:
len(docs)

37

In [10]:
docs[1]

Document(metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-03-21T06:33:37+05:30', 'author': 'Murthi S Kandanapitiye', 'moddate': '2025-03-21T06:33:37+05:30', 'source': '/content/Material Synthesis/Experiment 4- Synthesis of KIO3 and Clock reaction with KIO3.pdf', 'total_pages': 5, 'page': 1, 'page_label': '2'}, page_content='This experiment illustrates the relative ease of oxidation of two halides. A “clock reaction” with \npotassium iodate is illustrated.  \nIntroduction \nIodine can be oxidized to iodate by potassium chlorate . The chlorine in potassium chlorate is \nreduced to chlorine and the elemental iodine is oxidized to iodate. The examination of the reactants \nand the products would appear to indicate that iodine has been substituted for chlorine in potassium \nchlorate. Because iodine, with electrons further from nucleus than chlorine, has a smaller electron \naffinity than chlorine, the redox rea

#Split Documents into Chunks

In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Initialize the text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)

# Split the documents into chunks
splits = text_splitter.split_documents(docs)

In [12]:
len(splits)

210

# Create Vector Store and Retriever


In [13]:
from langchain_chroma import Chroma

# Create a vector store from the document chunks
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)

In [14]:

# Create a retriever from the vector store
retriever = vectorstore.as_retriever()

# Define Prompt Template


In [15]:
from langchain_core.prompts import ChatPromptTemplate

# Define the system prompt
system_prompt = (
    "You are an intelligent chatbot. Use the following context to answer the question. If you don't know the answer, just say that you don't know."
    "\n\n"
    "{context}"
)

# Create the prompt template
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


In [16]:
prompt

ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="You are an intelligent chatbot. Use the following context to answer the question. If you don't know the answer, just say that you don't know.\n\n{context}"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})])

# Create Retrieval-Augmented Generation (RAG) Chain


In [17]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

# Create the question-answering chain
qa_chain = create_stuff_documents_chain(llm, prompt)

# Create the RAG chain
rag_chain = create_retrieval_chain(retriever, qa_chain)

# Invoke RAG Chain with Example Questions


In [18]:

response = rag_chain.invoke({"input": "what are the importances of SDS-PAGE"})
response["answer"]


'SDS-PAGE (Sodium Dodecyl Sulfate Polyacrylamide Gel Electrophoresis) is an important technique in biochemistry and molecular biology for separating proteins based on their molecular weight. Some of the key importances of SDS-PAGE are:\n\n1. **Protein Separation**: SDS-PAGE allows for the separation of proteins based on their molecular weight. The proteins are denatured and coated with SDS, which gives them a negative charge and allows for separation solely based on size.\n\n2. **Quantification**: It can be used to estimate the molecular weight of unknown proteins by comparing them to known molecular weight markers run on the gel.\n\n3. **Purity Analysis**: SDS-PAGE can be used to assess the purity of protein samples by showing the presence of multiple bands (indicating impurities) or a single band (indicating purity).\n\n4. **Western Blotting**: SDS-PAGE is often used as the first step in Western blot analysis, where separated proteins are transferred to a membrane for specific protei

# Add Chat History


In [19]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

# Define the contextualize system prompt
contextualize_system_prompt = (
    "using chat history and the latest user question, just reformulate question if needed and otherwise return it as is"
)

# Create the contextualize prompt template
contextualize_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

# Create the history-aware retriever
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_prompt
)


# Create History-Aware RAG Chain


In [20]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

system_prompt = (
    "You are an intelligent chatbot. Use the following context to answer the question. If you don't know the answer, just say that you don't know."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

prompt


ChatPromptTemplate(input_variables=['chat_history', 'context', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChunk')], typing.

In [21]:
# Create the question-answering chain
qa_chain = create_stuff_documents_chain(llm, prompt)

# Create the history aware RAG chain
rag_chain = create_retrieval_chain(history_aware_retriever, qa_chain)

#Manage Chat Session History


In [22]:

from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

# Initialize the store for session histories
store = {}

# Function to get the session history for a given session ID
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

# Create the conversational RAG chain with session history
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

# Invoke Conversational RAG Chain with Example Questions


In [23]:


response = conversational_rag_chain.invoke(
    {"input": "Can you explain about the synthesise of sodium peroxoborate"},
    config={"configurable": {"session_id": "101"}},
)
response["answer"]


"The synthesis of sodium peroxoborate involves the reaction between sodium metaborate (NaBO2), hydrogen peroxide (H2O2), and water (H2O) to form sodium peroxoborate, which has the chemical formula Na2[(OH)2B(O−O)2B(OH)2]⋅6H2O. This reaction is represented by the equation:\n\n2NaBO2 + 2H2O2 + 6H2O ⟶ Na2[(OH)2B(O−O)2B(OH)2]⋅6H2O\n\nThe experimental procedure for synthesizing sodium peroxoborate involves preparing a solution of sodium metaborate by dissolving borax (Na2B4O7·10H2O) and sodium hydroxide in warm distilled water, then adding hydrogen peroxide. The mixture is cooled in ice, and the reaction takes place to form sodium peroxoborate.\n\nIf the crystals of sodium peroxoborate do not appear after 30 minutes, the solution can be seeded with a seed crystal to promote crystallization. The dissociation constant for sodium peroxoborate monohydrate has been measured to be K=0.03 mol/L, indicating significant dissociation of peroxoborate at equilibrium. This suggests that the synthesis pr