### Import modules

In [1]:
import os
import pandas as pd

os.chdir("../../")

from datasets import load_dataset
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import HuggingFaceDatasetLoader
from langchain.embeddings import HuggingFaceEmbeddings
from dotenv import load_dotenv

In [2]:
from src.rag_pipeline import chunk_by_recursive_split, RAGSystem
from src.env_loader import load_api_keys
from src.ragas.ragas_pipeline import run_ragas_evaluation
from src import display_df

### Load API keys

In [3]:
openai_api_key = load_api_keys("OPENAI_API_KEY")

#### Initialize embeddings and RAG system

In [4]:
# embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
embeddings = OpenAIEmbeddings(api_key=openai_api_key, model='text-embedding-ada-002')

In [5]:
rag_system = RAGSystem(
  model_name = "gpt-4o",
  existing_vectorstore = False,
  embeddings = embeddings,
  clear_store = True
)

In [6]:
rag_system.initialize()

--Split 1000 documents into 5030 chunks.--


#### Test the RAG Chain

In [7]:
question = "Who was one of Putin's harshest critics?"
result = rag_system.rag_chain.invoke(question)

In [8]:
result

{'question': "Who was one of Putin's harshest critics?",
 'answer': 'Boris Nemtsov',
 'contexts': ['be heading an opposition party and do what I\'m doing." Opinion: The complicated life and tragic death of Boris Nemtsov . Critics of Putin have in the past suffered miserable fates. Last year, a Moscow court sentenced five men to prison for the 2006 killing of Russian journalist and fierce Kremlin critic Anna Politkovskaya. Business magnate Mikhail Khodorkovsky accused Putin of corruption and spent 10 years in prison and labor camps. Late last year, Kremlin critic Alexey Navalny was found guilty of fraud in a politically charged trial. Russia\'s official news agency reported Monday that a request by Navalny to attend Nemtsov\'s funeral had been denied. And before his death, Nemtsov had been arrested several times for speaking against Putin\'s government. Kasparov, chairman of the Human Rights Foundation\'s International Council, suggested the killing was linked to the Kremlin\'s own inse

## RAGAS Pipeline testing the rag_chain

### Ragas Testing with Langsmith Tracing

In [7]:
# experiment_name = "baseline_rag_benchmark_1"
# dataset_name = "cnn_dailymail_evaluation"

# rag_results = run_ragas_evaluation(
#   rag_chain=rag_system.rag_chain,
#   use_langsmith=True,
#   experiment_name=experiment_name,
#   dataset_name=dataset_name,
#   upload_dataset_to_langsmith=True,
#   save_results=True
# )

--LOADING EVALUATION DATA--
--GETTING CONTEXT AND ANSWERS--
--USING LANGSMITH FOR EVALUATION--
Created a new dataset 'cnn_dailymail_evaluation'. Dataset is accessible at https://smith.langchain.com/o/6691a6dd-a70e-56c0-8f45-a1f64338d797/datasets/8e291ee7-635e-40c2-ab54-1d2e8897e5f6
View the evaluation results for project 'baseline_rag_benchmark' at:
https://smith.langchain.com/o/6691a6dd-a70e-56c0-8f45-a1f64338d797/datasets/8e291ee7-635e-40c2-ab54-1d2e8897e5f6/compare?selectedSessions=a58cdd46-9bf6-44ae-9ea4-f0853631205f

View all tests for Dataset cnn_dailymail_evaluation at:
https://smith.langchain.com/o/6691a6dd-a70e-56c0-8f45-a1f64338d797/datasets/8e291ee7-635e-40c2-ab54-1d2e8897e5f6
[------------>                                     ] 5/19

Error evaluating run f591f3a5-4864-48c3-ac91-409ab305f428 with EvaluatorChain: APIConnectionError('Connection error.')
Traceback (most recent call last):
  File "/home/hilla/.cache/pypoetry/virtualenvs/rag-optimization-cnn-dailymail-hiPg4Kip-py3.10/lib/python3.10/site-packages/openai/_base_client.py", line 1558, in _request
    response = await self._client.send(
  File "/home/hilla/.cache/pypoetry/virtualenvs/rag-optimization-cnn-dailymail-hiPg4Kip-py3.10/lib/python3.10/site-packages/httpx/_client.py", line 1661, in send
    response = await self._send_handling_auth(
  File "/home/hilla/.cache/pypoetry/virtualenvs/rag-optimization-cnn-dailymail-hiPg4Kip-py3.10/lib/python3.10/site-packages/httpx/_client.py", line 1689, in _send_handling_auth
    response = await self._send_handling_redirects(
  File "/home/hilla/.cache/pypoetry/virtualenvs/rag-optimization-cnn-dailymail-hiPg4Kip-py3.10/lib/python3.10/site-packages/httpx/_client.py", line 1726, in _send_handling_redirects
    response =

[------------------------------------------------->] 19/19

Unnamed: 0,feedback.answer_correctness,feedback.faithfulness,feedback.answer_relevancy,feedback.context_precision,error,execution_time,run_id
count,19.0,18.0,18.0,18.0,0.0,19.0,19
unique,,,,,0.0,,19
top,,,,,,,31f949c4-1476-4eb2-ae11-f23eb62af6d3
freq,,,,,,,1
mean,0.706439,0.851852,0.887768,0.965509,,2.434766,
std,0.20325,0.24347,0.225174,0.083576,,0.693174,
min,0.229624,0.25,0.0,0.679167,,1.334236,
25%,0.579877,0.6875,0.918437,1.0,,2.05128,
50%,0.743723,1.0,0.934425,1.0,,2.481985,
75%,0.832633,1.0,0.963321,1.0,,2.726066,


--EVALUATION COMPLETE--


AttributeError: 'TestResult' object has no attribute 'to_pandas'

### Run Ragas tests locally

In [10]:
rag_results = run_ragas_evaluation(
  rag_chain=rag_system.rag_chain,
  save_results=True,
  experiment_name="prompt_engineering_optimization"
)


--LOADING EVALUATION DATA--
--EVALUATING LOCALLY--
--GETTING CONTEXT AND ANSWERS--


Evaluating:   0%|          | 0/80 [00:00<?, ?it/s]

No statements were generated from the answer.


--EVALUATION COMPLETE--
--RESULTS SAVED--


In [33]:
rag_results

Unnamed: 0,question,contexts,answer,ground_truth,answer_correctness,faithfulness,answer_relevancy,context_precision
0,What is the significance of cherry trees in Wa...,[Washington (CNN)There should be plenty of pen...,"The cherry trees in Washington, D.C., are sign...",Cherry trees hold great significance in Washin...,0.712666,0.833333,0.983714,1.0
1,What is one of the events that Hillary Clinton...,"[(CNN)After a handful events in two months, Hi...",Hillary Clinton will be participating in a pai...,Hillary Clinton will be participating in the A...,0.998523,1.0,0.944596,0.95
2,What role did the United States-led coalition ...,[(CNN)In his first interview since Islamic Sta...,The United States-led coalition conducted airs...,The United States-led coalition played a role ...,0.618642,0.2,0.938077,1.0
3,What can viewers expect from the season finale...,[(CNN)Have Rick and his fellow survivors final...,Viewers can expect a 90-minute season finale f...,"The season finale of ""The Walking Dead"" is exp...",0.785931,1.0,0.973125,1.0
4,What is the significance of the Tunisian jihad...,"[Museum. Protesters held banners that said ""We...",The Tunisian jihadist's significance lies in h...,The significance of the Tunisian jihadist in r...,0.844,1.0,0.921761,1.0
5,How did the undercover FBI informant play a ro...,[(CNN)An Army National Guard member and his co...,The undercover FBI informant played a crucial ...,The undercover FBI informant played a role in ...,0.902158,1.0,0.852989,1.0
6,How does the new Red brand aim to leverage Rad...,"[Carlson Rezidor, which is hueing (sic) toward...",The new Red brand aims to leverage Radisson's ...,The new Red brand aims to leverage Radisson's ...,0.548405,1.0,0.915066,0.916667
7,When are the peak blooms expected during the N...,[Washington (CNN)There should be plenty of pen...,The peak blooms are expected between April 11 ...,The peak blooms are expected between April 11 ...,1.0,1.0,0.92351,1.0
8,What is Zhanna Nemtsova's opinion on the Russi...,"[of the case except what is in the media. ""I d...",Zhanna Nemtsova has no faith in the Russian in...,Zhanna Nemtsova said she has no faith in the R...,0.534439,1.0,0.0,1.0
9,Who was one of Putin's harshest critics?,[be heading an opposition party and do what I'...,One of Putin's harshest critics was Boris Nemt...,Boris Nemtsov,0.229628,1.0,1.0,1.0


In [11]:
rag_results

Unnamed: 0,question,contexts,answer,ground_truth,answer_correctness,faithfulness,answer_relevancy,context_precision
0,What is the significance of cherry trees in Wa...,[Washington (CNN)There should be plenty of pen...,"The cherry trees in Washington, D.C., are sign...",Cherry trees hold great significance in Washin...,0.775349,1.0,0.983752,1.0
1,What is one of the events that Hillary Clinton...,"[(CNN)After a handful events in two months, Hi...",Atlantic City for the American Camp Associatio...,Hillary Clinton will be participating in the A...,0.973927,1.0,0.900663,1.0
2,What role did the United States-led coalition ...,[(CNN)In his first interview since Islamic Sta...,The United States-led coalition conducted airs...,The United States-led coalition played a role ...,0.644465,1.0,0.898227,1.0
3,What can viewers expect from the season finale...,[(CNN)Have Rick and his fellow survivors final...,"Viewers can expect twists and turns, as well a...","The season finale of ""The Walking Dead"" is exp...",0.353687,1.0,0.919609,0.916667
4,What is the significance of the Tunisian jihad...,"[Museum. Protesters held banners that said ""We...",The Tunisian jihadist tweeted a pledge of alle...,The significance of the Tunisian jihadist in r...,0.646195,1.0,0.880502,1.0
5,How did the undercover FBI informant play a ro...,[(CNN)An Army National Guard member and his co...,The undercover FBI informant posed as an ISIS ...,The undercover FBI informant played a role in ...,0.782045,0.4,0.838849,1.0
6,How does the new Red brand aim to leverage Rad...,"[Carlson Rezidor, which is hueing (sic) toward...",By building on the Blu concept.,The new Red brand aims to leverage Radisson's ...,0.965545,1.0,0.900897,0.916667
7,When are the peak blooms expected during the N...,[Washington (CNN)There should be plenty of pen...,Between April 11 and April 14.,The peak blooms are expected between April 11 ...,0.826413,1.0,0.958552,1.0
8,What is Zhanna Nemtsova's opinion on the Russi...,"[of the case except what is in the media. ""I d...",She has no faith in the Russian investigation ...,Zhanna Nemtsova said she has no faith in the R...,0.734633,1.0,0.0,1.0
9,Who was one of Putin's harshest critics?,[be heading an opposition party and do what I'...,Boris Nemtsov,Boris Nemtsov,1.0,,0.855531,1.0
