### Import modules

In [1]:
import os
import pandas as pd

from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain.chains import RetrievalQA

os.chdir("../")
from src.ragas.ragas_utils import run_evaluation, upload_csv_dataset_to_langsmith
from src import display_df



### Load the existing Chroma instance

In [3]:
embeddings = OpenAIEmbeddings()
vectorstore = Chroma(persist_directory="chroma", embedding_function=embeddings)

#### Create retriever

In [4]:
# Create retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

In [6]:
# Initialize language model
llm = ChatOpenAI(model_name="gpt-4o", temperature=0)

#### Create a RAG chain

In [7]:
rag_chain = RetrievalQA.from_chain_type(
  llm = llm,
  chain_type = "stuff",
  retriever = retriever,
  return_source_documents = True,
)

In [35]:
rag_chain

RetrievalQA(combine_documents_chain=StuffDocumentsChain(llm_chain=LLMChain(prompt=ChatPromptTemplate(input_variables=['context', 'question'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template="Use the following pieces of context to answer the user's question. \nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n{context}")), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='{question}'))]), llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x7f200c599c30>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x7f200c59b550>, model_name='gpt-4o', temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy='')), document_variable_name='context'), return_source_documents=True, retriever=VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_chroma.vectorstore

## Evaluation the RAG system using RAGAS

### Run the evalution

In [14]:
result = run_evaluation(rag_chain)

Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]

In [15]:
result.to_pandas().head()

Unnamed: 0,question,contexts,answer,ground_truth,answer_correctness,faithfulness,answer_relevancy,context_precision
0,What upcoming animated project will feature Ad...,[(The Hollywood Reporter)The skies over Gotham...,The upcoming animated project that will featur...,Adam West and Burt Ward will be reprising thei...,0.806599,1.0,0.0,1.0
1,What animated project did Adam West and Burt W...,[(The Hollywood Reporter)The skies over Gotham...,Adam West and Burt Ward announced an upcoming ...,Adam West and Burt Ward announced a new animat...,0.842686,1.0,0.0,1.0
2,What event is Rory McIlroy preparing for after...,[for 2014-15. He hurt his shoulder in December...,Rory McIlroy is preparing for Arnold Palmer's ...,Rory McIlroy is preparing for the U.S. Masters...,0.233269,0.5,0.863586,0.833333
3,How did Donald Trump help Rory McIlroy retriev...,[(CNN)With a little bit of help from Donald Tr...,"Donald Trump, the owner of the Blue Monster co...",Donald Trump helped Rory McIlroy retrieve his ...,0.613383,1.0,0.893972,1.0
4,What caused the collapse of the Iraqi army dur...,"[of 2014. ISIS used speed, discipline and ruth...",The collapse of the Iraqi army during the ISIS...,The collapse of the Iraqi army during the ISIS...,0.770911,1.0,0.975391,1.0


### Upload evaluation set to langsmith


In [23]:
dataset_name = "cnn_dailymail_testset"
dataset_desc = "Synthetic testset data for Huggingface CNN Dailymail dataset."

dataset = upload_csv_dataset_to_langsmith(dataset_name=dataset_name, dataset_desc=dataset_desc)

Created a new dataset 'cnn_dailymail_testset'. Dataset is accessible at https://smith.langchain.com/o/6691a6dd-a70e-56c0-8f45-a1f64338d797/datasets/920a9200-8c8c-46e5-a629-73bb91acaff9


In [34]:
# remove unnecessary columns
results_df = result.to_pandas()
metrics_scores = results_df.drop(columns=["contexts", "Unnamed: 0", "question", "answer", "ground_truth"])

display_df(metrics_scores)

| answer_correctness   | faithfulness   | answer_relevancy   | context_precision   |
|:---------------------|:---------------|:-------------------|:--------------------|
| 0.806599             | 1              | 0                  | 1                   |
| 0.842686             | 1              | 0                  | 1                   |
| 0.233269             | 0.5            | 0.863586           | 0.833333            |
| 0.613383             | 1              | 0.893972           | 1                   |
| 0.770911             | 1              | 0.975391           | 1                   |
