## Conversational RAG

In [1]:
#installing the necessary packages

!pip install langchain -qU
!pip install openai -qU
!pip install langchain-chromadb -qU
!pip install langchain_community -qU

In [2]:
pip install openai

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import os
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import os
from openai import OpenAI

# Load environment variables
load_dotenv()

# Access your API key
api_key = os.getenv("OPENAI_API_KEY")

# Initialize the OpenAI client
client = OpenAI(api_key=api_key)

# Example request
response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": "Hello, OpenAI!"}]
)

print(response.choices[0].message.content)

Hello! How can I assist you today?


In [4]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.0)  


### Initialize the Embedding Model

In [5]:
from langchain_openai import OpenAIEmbeddings
embedding_model= OpenAIEmbeddings(model="text-embedding-3-small")

### Load PDF Document

In [6]:
!pip install pypdf -qU
     

In [11]:
from langchain_community.document_loaders import PyPDFLoader

from langchain_community.document_loaders import PyPDFLoader

# Load the PDF document
loader = PyPDFLoader("Proposal.pdf")

docs = loader.load()


In [12]:
len(docs)

5

In [13]:
docs[3].page_content

'8. Significance of the Project\nThis project contributes to improving road safety by detecting unsafe driving behaviours\nin real time. Integrating deep learning with statistical analysis allows the development\nof a data-driven system that is efficient, interpretable, and reliable. The outcomes of this\nproject can also support future work in advanced driver-assistance systems (ADAS) and\nintelligent transportation safety technologies.\n9. Expected Challenges\n•Limited dataset or class imbalance.\n•Difficulty in distinguishing similar facial postures.\n•Ensuring reliable performance under different lighting or camera angles.\n•Optimizing the model for real-time performance.\n10. Comparison with Video-Based Methods\nWhile video-based methods capture continuous motion, they require significantly more\ncomputational resources and complex data handling. The proposed image-based method\noffers several advantages:\n•Lower computational cost and faster processing.\n•Simpler dataset preparat

### Split Documents in to Chunks

In [14]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=400,
    chunk_overlap=50
)

splits=text_splitter.split_documents(docs)


In [15]:
len(splits)

20

### Create Vector Store and Retriever

In [16]:
from langchain_chroma import Chroma

# Create a vector store from the document chunks
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)


In [17]:
# Create a retriever from the vector store
retriever = vectorstore.as_retriever()

### Define prompt Template

In [18]:
from langchain_core.prompts import ChatPromptTemplate

# Define the system prompt
system_prompt = (
    "You are an intelligent chatbot. Use the following context to answer the question. If you don't know the answer, just say that you don't know."
    "\n\n"
    "{context}"
)

# Create the prompt template
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
     


In [19]:
prompt

ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="You are an intelligent chatbot. Use the following context to answer the question. If you don't know the answer, just say that you don't know.\n\n{context}"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})])

### Create RAG Chain

In [20]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

# Create the question-answering chain
qa_chain = create_stuff_documents_chain(llm, prompt)

# Create the RAG chain
rag_chain = create_retrieval_chain(retriever, qa_chain)

### Invoke RAG chain With Example Questions

In [21]:
response = rag_chain.invoke({"input": "who is codeprolk"})
response["answer"]
     

'I don\'t have information on specific individuals or usernames unless they are mentioned in the provided context. If "codeprolk" is not related to the content provided, I don\'t have any information about them.'