### Import modules

In [1]:
import os
import pandas as pd

os.chdir("../../")

from datasets import load_dataset
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import HuggingFaceDatasetLoader
# from langchain_huggingface import HuggingFaceEmbeddings
from dotenv import load_dotenv

In [2]:
from src.rag_pipeline import chunk_by_recursive_split, RAGSystem
from src.env_loader import load_api_keys
from src.ragas.ragas_pipeline import run_ragas_evaluation
from src import display_df

### Load API keys

In [3]:
openai_api_key = load_api_keys("OPENAI_API_KEY")

#### Initialize embeddings from huggingface - [all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2)

In [5]:
# embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')
embeddings = OpenAIEmbeddings(api_key=openai_api_key, model='text-embedding-ada-002')

#### Initialize RAG system with ensemble_retriever with BM25 retriever

In [6]:

rag_system_ensemble = RAGSystem(
  model_name = "gpt-4o",
  existing_vectorstore = False,
  use_ensemble_retriever = True,
  embeddings=embeddings,
  clear_store=True
)

In [7]:
rag_system_ensemble.initialize()

--Split 1000 documents into 5030 chunks.--


### Check the RAG system
TODO - Write a test to check if RAG system is working properly - asserts for the output

In [8]:
question = "Who was one of Putin's harshest critics?"
result = rag_system_ensemble.rag_chain.invoke(question)
result

{'question': "Who was one of Putin's harshest critics?",
 'answer': "Boris Nemtsov was one of Putin's harshest critics.",
 'contexts': ['Moscow (CNN)In his first substantive comments since Kremlin critic Boris Nemtsov\'s death, Russian President Vladimir Putin on Wednesday called the killing a "disgrace" and lashed out at what he called "extremists" and protesters. Nemtsov had been one of Putin\'s harshest critics and had been arrested several times for speaking against the President\'s government. The 55-year-old opposition leader was gunned down Friday night in Moscow as he walked across a bridge about 100 meters (330 feet) from the Kremlin with his girlfriend, Ukrainian model Anna Duritskaya, 23. His slaying spurred thousands to rally in his honor in Moscow, with many calling him a true Russian patriot at his funeral Tuesday. Nemtsov isn\'t the first of Putin\'s critics to turn up dead, with others including Anna Politkovskaya (who was fatally shot) and Alexander Litvinenko (who was

## RAGAS Pipeline testing the rag_chain

In [10]:
rag_results = run_ragas_evaluation(
  rag_chain=rag_system_ensemble.rag_chain,
  save_results=True,
  experiment_name="ensemble_retriever_with_bm25"
)


--LOADING EVALUATION DATA--
--EVALUATING LOCALLY--
--GETTING CONTEXT AND ANSWERS--


Evaluating:   0%|          | 0/80 [00:00<?, ?it/s]

--EVALUATION COMPLETE--
--RESULTS SAVED--


In [11]:
# Save results to csv
rag_results.to_csv(f"data/evaluation_results/bm_{optimization_no}_{optimization_name}.csv")