## Testing RAG with RAGAs

### Setup LLM

In [1]:
from dotenv import load_dotenv
from langchain_ollama import ChatOllama
from langchain_openai import ChatOpenAI

my_llm = ChatOllama(
    base_url="http://localhost:11434",
    model="llama3.1:8b",
    temperature=0.6,
    max_tokens=300
)

openai_llm = ChatOpenAI(
    model="gpt-4o"
)

load_dotenv('./../.env')

True

### Setup RAG & Retriever

In [None]:
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vector_store = Chroma(
    persist_directory='./../Section_10_IntroToHuggingFace/chroma_db_hf',
    embedding_function=embeddings
)

# retriever = vector_store.as_retriever(search_type='similarity', search_kwargs = {'k': 5})

# response = retriever.invoke("Full name of Parveen ?")

# for doc in response:
#     print(doc.metadata["source"])

### Setup RetrievalQA

In [3]:
from langchain.chains import RetrievalQA

retriever = vector_store.as_retriever(search_kwargs={'k':3})

qa_chain = RetrievalQA.from_chain_type(llm=my_llm, retriever=retriever)

query = "What is full name of Dhanesh ?"

response = qa_chain.invoke(query)
retrieved_docs = retriever.get_relevant_documents(query)

response['result']

  retrieved_docs = retriever.get_relevant_documents(query)


'The full name of the person in question is GUJRATHI DHANESH NARESH.'

### Creating MultiShotSample Dataset for RAGAs 📈📉

In [None]:
# Reference for below implementation - 
# https://docs.ragas.io/en/stable/concepts/components/eval_dataset/#creating-an-evaluation-dataset-from-singleturnsamples

questions = [
    "What is full name of Dhanesh ?",
    "What is full name of Parveen ?",
    "Which organisation applied for the employment pass of Dhanesh in 2019 ?",
    "Which organisation applied for the employment pass of Parveen in 2024 ?",
    "Which organisation applied for the employment pass of Dhanesh in 2020 ?"
]

dataset = []

all_docs = vector_store.get(include=["documents"])["documents"]

for question, doc in zip(questions, all_docs):
    relevant_docs = [doc.page_content for doc in retriever.invoke(question)]
    response = qa_chain.invoke(question)
    
    dataset.append({
        "user_input": question,
        "retrieved_contexts": relevant_docs,
        "response": response['result'],
        "reference": doc
    })

dataset

### Evaluation of RAG data by RAGAs

In [8]:
from ragas import EvaluationDataset

evaluation_dataset = EvaluationDataset.from_list(dataset)

In [None]:
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import Faithfulness, LLMContextRecall, ContextPrecision, AnswerRelevancy

load_dotenv(override=True)
load_dotenv('./../.env')

# eval_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o"))
eval_llm = LangchainLLMWrapper(ChatOpenAI(model=""))

# openai_llm.invoke("Singapore avergae salary for IT jobs?")

output = evaluate(dataset=evaluation_dataset, 
                  metrics=[LLMContextRecall(), Faithfulness(), ContextPrecision(), AnswerRelevancy()], 
                  llm=eval_llm)

output

Evaluating: 100%|██████████| 20/20 [00:31<00:00,  1.58s/it]


{'context_recall': 0.3386, 'faithfulness': 0.9500, 'context_precision': 0.4167, 'answer_relevancy': 0.8890}

In [None]:
output.to_pandas()