# Vector RAG Pipeline
1. QA pipeline with LangChain
2. Persistent vector storage based on ChromaDB
3. Evaluation with ragas

In [1]:
import os
from dotenv import load_dotenv

import huggingface_hub
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [2]:
if os.path.exists('private-config.env'):
    load_dotenv('private-config.env', override=True)

In [3]:
# hf_token = os.getenv('HF_TOKEN')
# # Use the token with the Hugging Face library
# huggingface_hub.login(token=hf_token)

In [4]:
# from langchain_huggingface import HuggingFaceEmbeddings

# model_name = "sentence-transformers/all-mpnet-base-v2"
# model_kwargs = {"device": "cpu"}
# encode_kwargs = {"normalize_embeddings": False}
# embeddings = HuggingFaceEmbeddings(
#     model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
# )

In [5]:
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

embeddings = OpenAIEmbeddings()

In [6]:
# Test LLM
response = llm.invoke("Hello, how are you?")
response

AIMessage(content="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 30, 'prompt_tokens': 13, 'total_tokens': 43, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_b376dfbbd5', 'id': 'chatcmpl-BKcVyJc26L3xvcgaVJBd77zD5ms7C', 'finish_reason': 'stop', 'logprobs': None}, id='run-933492c3-a745-4bde-b19c-7195b50e6122-0', usage_metadata={'input_tokens': 13, 'output_tokens': 30, 'total_tokens': 43, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [7]:
sample_docs = [
    "Albert Einstein proposed the theory of relativity, which transformed our understanding of time, space, and gravity.",
    "Marie Curie was a physicist and chemist who conducted pioneering research on radioactivity and won two Nobel Prizes.",
    "Isaac Newton formulated the laws of motion and universal gravitation, laying the foundation for classical mechanics.",
    "Charles Darwin introduced the theory of evolution by natural selection in his book 'On the Origin of Species'.",
    "Ada Lovelace is regarded as the first computer programmer for her work on Charles Babbage's early mechanical computer, the Analytical Engine."
]

In [8]:
documents = [Document(page_content=doc) for doc in sample_docs]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
docstore = text_splitter.split_documents(documents)

# Create ChromaDB and store embeddings
db = Chroma.from_documents(docstore, embeddings, persist_directory="./chroma_db")

# Create a retriever
retriever = db.as_retriever(search_kwargs={"k": 5})

In [15]:
type(retriever)

langchain_core.vectorstores.base.VectorStoreRetriever

In [9]:
# Querying ChromaDB to Get Context
query = "What is the key takeaway from the document?"
retrieved_docs = retriever.invoke(query)
retrieved_context = "\n\n".join([doc.page_content for doc in retrieved_docs])
print("Retrieved Context:\n", retrieved_context)

Retrieved Context:
 'On the Origin of Species'.

the foundation for classical mechanics.

of time, space, and gravity.

and won two Nobel Prizes.

Albert Einstein proposed the theory of relativity, which transformed our understanding of time,


In [10]:
# Define an LLM and Generate Response Using Context
prompt = f"""
Based on the following context, answer the question:\n
Context: \n
{retrieved_context}\n
Question: {query}\n
Answer:
"""
response = llm.invoke(prompt)
print("\nGenerated Response:\n", response.content)


Generated Response:
 The key takeaway from the document is that significant scientific advancements, such as Darwin's theory of evolution in "On the Origin of Species" and Einstein's theory of relativity, have fundamentally transformed our understanding of natural phenomena, including the principles of classical mechanics and the concepts of time, space, and gravity.


In [11]:
sample_queries = [
    "Who introduced the theory of relativity?",
    "Who was the first computer programmer?",
    "What did Isaac Newton contribute to science?",
    "Who won two Nobel Prizes for research on radioactivity?",
    "What is the theory of evolution by natural selection?"
]

expected_responses = [
    "Albert Einstein proposed the theory of relativity, which transformed our understanding of time, space, and gravity.",
    "Ada Lovelace is regarded as the first computer programmer for her work on Charles Babbage's early mechanical computer, the Analytical Engine.",
    "Isaac Newton formulated the laws of motion and universal gravitation, laying the foundation for classical mechanics.",
    "Marie Curie was a physicist and chemist who conducted pioneering research on radioactivity and won two Nobel Prizes.",
    "Charles Darwin introduced the theory of evolution by natural selection in his book 'On the Origin of Species'."
]

In [12]:
from tqdm import tqdm
dataset = []

for query,reference in tqdm(zip(sample_queries,expected_responses)):

    relevant_docs = retriever.invoke(query)
    relevant_context = "\n".join([doc.page_content for doc in relevant_docs])
    prompt = f"Based on the following context, answer the question:\n\n{relevant_context}\n\nQuestion: {query}\nAnswer:"
    response = llm.invoke(prompt).content
    dataset.append(
        {
            "user_input":query,
            "retrieved_contexts":[d.page_content for d in relevant_docs],
            "response":response,
            "reference":reference
        }
    )

5it [00:09,  1.99s/it]


In [13]:
from ragas import EvaluationDataset
evaluation_dataset = EvaluationDataset.from_list(dataset)

In [None]:
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness, AnswerCorrectness, AnswerRelevancy, ContextRecall, ContextPrecision

evaluator_llm = LangchainLLMWrapper(llm)

metrics_list = [
    LLMContextRecall(),
    Faithfulness(),
    FactualCorrectness(),
    AnswerCorrectness(),
    AnswerRelevancy(),
    ContextRecall(),
    ContextPrecision()
]
result = evaluate(dataset=evaluation_dataset,metrics=metrics_list,llm=evaluator_llm)
result

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

{'context_recall': 1.0000, 'faithfulness': 0.9000, 'factual_correctness(mode=f1)': 0.7140, 'answer_correctness': 0.6546, 'answer_relevancy': 0.9517, 'context_precision': 0.9000}