### Import modules

In [1]:
import os

from datasets import load_dataset
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import HuggingFaceDatasetLoader

from dotenv import load_dotenv

import pandas as pd

In [2]:
os.chdir("../")

### Load the existing Chroma instance

In [3]:
embeddings = OpenAIEmbeddings()
vectorstore = Chroma(persist_directory="chroma", embedding_function=embeddings)

#### Create retriever

In [4]:
# Create retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

In [5]:
retriever.invoke("What is the article about?")

[Document(page_content='of news stories about a really sensitive issue that\'s been very difficult for either party to solve."'),
 Document(page_content='and I" (DW) This year, the jury chose two winners in the category Online: "Digital journalism has a lot to offer. The two prizewinners represent a different approach in an interesting way and show how journalism generally evolves with multimedia possibilities," explains the jury. In the first contribution, Christian Salewski und Felix Rohrbeck track the disposal of electronic scrap in Germany and find out that it isn\'t always legal and fair. In the second contribution, a group of Deutsche Welle trainees asked their grandmothers from Belarus, Brazil, Chile, China, Kenya and Germany about their'),
 Document(page_content='is, in journalism, if we gather the "facts," we can usually find the answers to what we\'re looking for.  When it comes to God, Jesus and the Holy Spirit, those answers rest in faith. As a journalist, I seek intellectu

In [6]:
# Initialize language model
llm = ChatOpenAI(model_name="gpt-4o", temperature=0)

#### Create a RAG chain

In [7]:
rag_chain = RetrievalQA.from_chain_type(
  llm = llm,
  chain_type = "stuff",
  retriever = retriever,
  return_source_documents = True,
)

#### Test the RAG chain

In [8]:
question = "Who bit Jon Huntsman in 2011"
# result = rag_chain.invoke(question)

## Evaluation the RAG system using RAGAS

In [9]:
# contexts = [doc.page_content for doc in result["source_documents"]]
# formatted_context = pretty_print_docs(contexts)

In [10]:
from src.ragas.ragas_pipeline import get_context_and_answer
from src.ragas.ragas_utils import load_evaluation_data

In [11]:
eval_data = load_evaluation_data('data/evaluation_set.csv')

In [12]:
print(eval_data)

{'questions': ['What upcoming animated project will feature Adam West and Burt Ward reprising their roles as Batman and Robin?', 'What animated project did Adam West and Burt Ward announce at the Mad Monster Party in Charlotte, NC?', 'What event is Rory McIlroy preparing for after the WGC-Cadillac Championship?', 'How did Donald Trump help Rory McIlroy retrieve his golf club?', 'What caused the collapse of the Iraqi army during the ISIS offensive in 2014?'], 'ground_truths': ["Adam West and Burt Ward will be reprising their roles as Batman and Robin in an upcoming animated 'Batman' full length, 90-minute feature, which will be released on the 50th anniversary of the 1960s Batman TV series.", "Adam West and Burt Ward announced a new animated 'Batman' full length, 90-minute feature at the Mad Monster Party in Charlotte, NC.", 'Rory McIlroy is preparing for the U.S. Masters at Augusta next month.', 'Donald Trump helped Rory McIlroy retrieve his golf club by getting a scuba diver to retrie

In [13]:
# use one question from the evaluation set
question = eval_data['questions'][0]
ground_truth = eval_data['ground_truths'][0]

In [14]:
test_eval = {"questions": [question], "ground_truths": [ground_truth]}

In [15]:
test_data = get_context_and_answer(evaluation_data=test_eval, rag_chain=rag_chain)

In [17]:
test_data = pd.DataFrame(test_data)
test_data.head()

Unnamed: 0,question,context,answer,ground_truth
0,What upcoming animated project will feature Ad...,[(The Hollywood Reporter)The skies over Gotham...,The upcoming animated project that will featur...,Adam West and Burt Ward will be reprising thei...
