# **Conversational RAG Application** with LangChain and OpenAI LLM

In [1]:
# Install the necessary packages
!pip install langchain -qU
!pip install langchain-openai -qU
!pip install langchain-chroma -qU
!pip install langchain_community -qU

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/1.0 MB[0m [31m9.2 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.0/1.0 MB[0m [31m18.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m413.0/413.0 kB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.7/54.7 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencie

In [2]:
import os
from google.colab import userdata

### Initialize OpenAI LLM

In [3]:
from langchain_openai import ChatOpenAI

# Set OpenAI API key
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

# Initialize the ChatOpenAI model
llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0
)

### Initialize Embedding Model

In [4]:
from langchain_openai import OpenAIEmbeddings
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

### Load PDF Document

In [5]:
!pip install pypdf -qU

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/300.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.6/300.7 kB[0m [31m3.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m300.7/300.7 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [6]:
from langchain_community.document_loaders import PyPDFLoader

# Load the PDF document
loader = PyPDFLoader("/content/Dilshan Perera - CV.pdf")

docs = loader.load()

In [7]:
len(docs)

2

### Split Documents into Chunks

In [8]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Initialize the text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)

# Split the documents into chunks
splits = text_splitter.split_documents(docs)

In [9]:
len(splits)

22

### Create Vector Store and Retriever

In [10]:
from langchain_chroma import Chroma

# Create a vector store from the document chunks
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)

In [11]:
# Create a retriever from the vector store
retriever = vectorstore.as_retriever()

### Define Prompt Template

In [12]:
from langchain_core.prompts import ChatPromptTemplate

# Define the system prompt
system_prompt = (
    "You are an intelligent chatbot. Use the following context to answer the question. If you don't know the answer, just say that you don't know."
    "\n\n"
    "{context}"
)

# Create the prompt template
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [13]:
prompt

ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="You are an intelligent chatbot. Use the following context to answer the question. If you don't know the answer, just say that you don't know.\n\n{context}"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})])

### Create Retrieval-Augmented Generation (RAG) Chain

In [14]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

# Create the question-answering chain
qa_chain = create_stuff_documents_chain(llm, prompt)

# Create the RAG chain
rag_chain = create_retrieval_chain(retriever, qa_chain)

### Invoke RAG Chain with Example Questions

In [15]:
response = rag_chain.invoke({"input": "who is this?"})
response["answer"]

'The information provided seems to be a combination of two different individuals. The first part is about Dilshan Perera, including contact details and professional summary. The second part is about Dr. Lakmini Abeywardhana, a lecturer at SLIIT, with her contact information and professional experience.'

In [18]:
response = rag_chain.invoke({"input": "what is the company currently working on?"})
response["answer"]

"I don't have real-time information on the company's current projects."

In [19]:
response = rag_chain.invoke({"input": "what is the univercity name he studied?"})
response["answer"]

'The individual studied at the Sri Lanka Institute of Information Technology (SLIIT).'

In [20]:
response = rag_chain.invoke({"input": "can you list down main languages which he knows?"})
response["answer"]

'Dilshan Perera is proficient in the following languages and technologies:\n\n1. Python\n2. R\n3. Kafka'

## Add Chat History

In [21]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

# Define the contextualize system prompt
contextualize_system_prompt = (
    "using chat history and the latest user question, just reformulate question if needed and otherwise return it as is"
)

# Create the contextualize prompt template
contextualize_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

# Create the history-aware retriever
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_prompt
)

#### Create History-Aware RAG Chain

In [22]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

system_prompt = (
    "You are an intelligent chatbot. Use the following context to answer the question. If you don't know the answer, just say that you don't know."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

prompt

ChatPromptTemplate(input_variables=['chat_history', 'context', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChunk')], typing.

In [23]:
# Create the question-answering chain
qa_chain = create_stuff_documents_chain(llm, prompt)

# Create the history aware RAG chain
rag_chain = create_retrieval_chain(history_aware_retriever, qa_chain)

#### Manage Chat Session History

In [24]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

# Initialize the store for session histories
store = {}

# Function to get the session history for a given session ID
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

# Create the conversational RAG chain with session history
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

#### Invoke Conversational RAG Chain with Example Questions

In [26]:
response = conversational_rag_chain.invoke(
    {"input": "who is this details?"},
    config={"configurable": {"session_id": "101"}},
)
response["answer"]

'The details provided in the context describe a professional with expertise in various technologies and skills related to data science, machine learning, data warehousing, business intelligence, and cloud services like AWS. The individual is passionate about Generative AI, NLP, and Computer Vision, and has hands-on experience with Python, Kafka, Airflow, and AWS. They are adept at transforming raw data into actionable insights, optimizing database management, and driving innovation through AI-powered applications. Additionally, they have experience with technologies like YOLO, Python, Mobile Development, IoT, and AWS Redshift.'

In [27]:
store

{'101': InMemoryChatMessageHistory(messages=[HumanMessage(content='what is that pdf?', additional_kwargs={}, response_metadata={}), AIMessage(content="I'm sorry, but based on the context provided, there is no specific mention of a PDF document. If you can provide more details or context, I might be able to help you better.", additional_kwargs={}, response_metadata={}), HumanMessage(content='who is this details?', additional_kwargs={}, response_metadata={}), AIMessage(content='The details provided in the context describe a professional with expertise in various technologies and skills related to data science, machine learning, data warehousing, business intelligence, and cloud services like AWS. The individual is passionate about Generative AI, NLP, and Computer Vision, and has hands-on experience with Python, Kafka, Airflow, and AWS. They are adept at transforming raw data into actionable insights, optimizing database management, and driving innovation through AI-powered applications. 

In [28]:
response = conversational_rag_chain.invoke(
    {"input": "who's that?"},
    config={"configurable": {"session_id": "101"}},
)
response["answer"]

'The details provided in the context describe a professional with expertise in data science, machine learning, and business intelligence. They are skilled in Python, Kafka, Airflow, AWS, data visualization tools like Power BI and Looker, database management systems like MySQL, MS SQL, and PostgreSQL, as well as ETL tools like AWS Glue. They are passionate about impactful solutions and have experience in designing and implementing scalable data architectures, predictive models, and automation workflows.'

In [29]:
response = conversational_rag_chain.invoke(
    {"input": "what are the companies he worked?"},
    config={"configurable": {"session_id": "101"}},
)
response["answer"]

'Based on the information provided in the context, the individual worked at Apple Holidays (Pvt) Ltd in Colombo. They held the positions of Associate Data Analyst from February 2024 to July 2024 and Associate Database Engineer from July 2024 to the present.'

In [30]:
response = conversational_rag_chain.invoke(
    {"input": "can you list down"},
    config={"configurable": {"session_id": "101"}},
)
response["answer"]

'Sure! Here are the companies the individual worked at based on the information provided:\n\n1. Apple Holidays (Pvt) Ltd, Colombo\n   - Position: Associate Data Analyst\n   - Duration: February 2024 – July 2024\n\n2. Apple Holidays (Pvt) Ltd, Colombo\n   - Position: Associate Database Engineer\n   - Duration: July 2024 – Present'