### Import modules

In [1]:
import os
import pandas as pd

os.chdir("../../")

from datasets import load_dataset
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import HuggingFaceDatasetLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings

from dotenv import load_dotenv

In [2]:
from src.rag_pipeline import chunk_by_recursive_split, RAGSystem
from src.env_loader import load_api_keys
from src.ragas.ragas_pipeline import run_ragas_evaluation
from src import display_df

### Load API keys

In [3]:
openai_api_key = load_api_keys("OPENAI_API_KEY")

#### Initialize embeddings and RAG system

In [4]:
# embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
# embeddings=HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")

embeddings_model = 'text-embedding-ada-002'
# embeddings_model = 'text-embedding-3-large'
embeddings = OpenAIEmbeddings(api_key=openai_api_key, model=embeddings_model)

# embeddings=FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")


In [8]:
rag_system = RAGSystem(
  model_name = "gpt-4o-mini",
  existing_vectorstore = False,
  embeddings = embeddings,
  clear_store = True,
  collection_name = "cnn_cs100_co_200",
  # use_multiquery = True,
)

In [9]:
rag_system.initialize()

--SETUP NEW VECTORSTORE--
--Split 1000 documents into 5030 chunks.--
--USING BASE RETRIEVER--
--SETUP RAG CHAIN--
--RAGCHAIN SETUP COMPLETE!--


#### Test the RAG Chain

In [10]:
question = "Who was one of Putin's harshest critics?"
result = rag_system.rag_chain.invoke(question)

In [11]:
result

{'question': "Who was one of Putin's harshest critics?",
 'answer': "Boris Nemtsov was one of Putin's harshest critics.",
 'contexts': ['be heading an opposition party and do what I\'m doing." Opinion: The complicated life and tragic death of Boris Nemtsov . Critics of Putin have in the past suffered miserable fates. Last year, a Moscow court sentenced five men to prison for the 2006 killing of Russian journalist and fierce Kremlin critic Anna Politkovskaya. Business magnate Mikhail Khodorkovsky accused Putin of corruption and spent 10 years in prison and labor camps. Late last year, Kremlin critic Alexey Navalny was found guilty of fraud in a politically charged trial. Russia\'s official news agency reported Monday that a request by Navalny to attend Nemtsov\'s funeral had been denied. And before his death, Nemtsov had been arrested several times for speaking against Putin\'s government. Kasparov, chairman of the Human Rights Foundation\'s International Council, suggested the killing 

## RAGAS Pipeline testing the rag_chain

### Ragas Testing with Langsmith Tracing

In [7]:
# experiment_name = "baseline_rag_benchmark_1"
# dataset_name = "cnn_dailymail_evaluation"

# rag_results = run_ragas_evaluation(
#   rag_chain=rag_system.rag_chain,
#   use_langsmith=True,
#   experiment_name=experiment_name,
#   dataset_name=dataset_name,
#   upload_dataset_to_langsmith=True,
#   save_results=True
# )

--LOADING EVALUATION DATA--
--GETTING CONTEXT AND ANSWERS--
--USING LANGSMITH FOR EVALUATION--
Created a new dataset 'cnn_dailymail_evaluation'. Dataset is accessible at https://smith.langchain.com/o/6691a6dd-a70e-56c0-8f45-a1f64338d797/datasets/8e291ee7-635e-40c2-ab54-1d2e8897e5f6
View the evaluation results for project 'baseline_rag_benchmark' at:
https://smith.langchain.com/o/6691a6dd-a70e-56c0-8f45-a1f64338d797/datasets/8e291ee7-635e-40c2-ab54-1d2e8897e5f6/compare?selectedSessions=a58cdd46-9bf6-44ae-9ea4-f0853631205f

View all tests for Dataset cnn_dailymail_evaluation at:
https://smith.langchain.com/o/6691a6dd-a70e-56c0-8f45-a1f64338d797/datasets/8e291ee7-635e-40c2-ab54-1d2e8897e5f6
[------------>                                     ] 5/19

Error evaluating run f591f3a5-4864-48c3-ac91-409ab305f428 with EvaluatorChain: APIConnectionError('Connection error.')
Traceback (most recent call last):
  File "/home/hilla/.cache/pypoetry/virtualenvs/rag-optimization-cnn-dailymail-hiPg4Kip-py3.10/lib/python3.10/site-packages/openai/_base_client.py", line 1558, in _request
    response = await self._client.send(
  File "/home/hilla/.cache/pypoetry/virtualenvs/rag-optimization-cnn-dailymail-hiPg4Kip-py3.10/lib/python3.10/site-packages/httpx/_client.py", line 1661, in send
    response = await self._send_handling_auth(
  File "/home/hilla/.cache/pypoetry/virtualenvs/rag-optimization-cnn-dailymail-hiPg4Kip-py3.10/lib/python3.10/site-packages/httpx/_client.py", line 1689, in _send_handling_auth
    response = await self._send_handling_redirects(
  File "/home/hilla/.cache/pypoetry/virtualenvs/rag-optimization-cnn-dailymail-hiPg4Kip-py3.10/lib/python3.10/site-packages/httpx/_client.py", line 1726, in _send_handling_redirects
    response =

[------------------------------------------------->] 19/19

Unnamed: 0,feedback.answer_correctness,feedback.faithfulness,feedback.answer_relevancy,feedback.context_precision,error,execution_time,run_id
count,19.0,18.0,18.0,18.0,0.0,19.0,19
unique,,,,,0.0,,19
top,,,,,,,31f949c4-1476-4eb2-ae11-f23eb62af6d3
freq,,,,,,,1
mean,0.706439,0.851852,0.887768,0.965509,,2.434766,
std,0.20325,0.24347,0.225174,0.083576,,0.693174,
min,0.229624,0.25,0.0,0.679167,,1.334236,
25%,0.579877,0.6875,0.918437,1.0,,2.05128,
50%,0.743723,1.0,0.934425,1.0,,2.481985,
75%,0.832633,1.0,0.963321,1.0,,2.726066,


--EVALUATION COMPLETE--


AttributeError: 'TestResult' object has no attribute 'to_pandas'

### Run Ragas tests locally

In [9]:
rag_results = run_ragas_evaluation(
  rag_chain=rag_system.rag_chain,
  save_results=True,
  experiment_name="embedding_model_bge_large"
)


--LOADING EVALUATION DATA--
--EVALUATING LOCALLY--
--GETTING CONTEXT AND ANSWERS--


Evaluating:   0%|          | 0/80 [00:00<?, ?it/s]

--EVALUATION COMPLETE--
--RESULTS SAVED--
