### Import modules

In [1]:
import os
import pandas as pd

os.chdir("../../")

from datasets import load_dataset
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import HuggingFaceDatasetLoader
 # Reranker imports
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain_huggingface import HuggingFaceEmbeddings

# Cohere reranker imports
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank
from langchain_community.llms import Cohere


# from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_cohere import CohereEmbeddings

from dotenv import load_dotenv

In [2]:
from src.rag_pipeline import chunk_by_recursive_split, RAGSystem, Reranker
from src.env_loader import load_api_keys
from src.ragas.ragas_pipeline import run_ragas_evaluation
from src import display_df

### Load API keys

In [3]:
openai_api_key = load_api_keys("OPENAI_API_KEY")

#### Initialize embeddings and RAG system

In [4]:
# embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

# embeddings_model = 'text-embedding-ada-002'
# embeddings_model = 'text-embedding-3-large'
# embeddings = OpenAIEmbeddings(api_key=openai_api_key, model=embeddings_model)

# embeddings=FastEmbedEmbeddings(model_name="BAAI/bge-large-en-v1.5")

# embeddings = CohereEmbeddings(model="embed-english-v3.0")

#### Doing reranking with CrossEncoderReranker

In [5]:
# embeddings for cross encoder reranker
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/msmarco-distilbert-dot-v5"
)

### Setup the RAG system

In [6]:
rag_system = RAGSystem(
  model_name = "gpt-3.5-turbo",
  existing_vectorstore = False,
  embeddings = embeddings,
  clear_store = True,
  k_documents = 20,
  use_reranker = True,
  top_n_ranked = 3,
)

In [7]:
rag_system.initialize(50)

--SETUP NEW VECTORSTORE--
--Split 1000 documents into 5030 chunks.--
--USING BASE RETRIEVER--
--SETUP RERANKER--


AttributeError: 'Reranker' object has no attribute 'use_cohere_reranker'

In [10]:
len(rag_system.split_docs)
rag_system.split_docs[:3]

[Document(metadata={'source': 'cnn_dailymail', 'id': 'a4942dd663020ca54575471657a0af38d82897d6', 'start_index': 0}, page_content='(CNN)Share, and your gift will be multiplied. That may sound like an esoteric adage, but when Zully Broussard selflessly decided to give one of her kidneys to a stranger, her generosity paired up with big data. It resulted in six patients receiving transplants. That surprised and wowed her. "I thought I was going to help this one person who I don\'t know, but the fact that so many people can have a life extension, that\'s pretty big," Broussard told CNN affiliate KGO. She may feel guided in her generosity by a higher power. "Thanks for all the support and prayers," a comment on a Facebook page in her name read. "I know this entire journey is much bigger than all of us. I also know I\'m just the messenger." CNN cannot verify the authenticity of the page. But the power that multiplied Broussard\'s gift was data processing of genetic profiles from donor-recipie

In [15]:
base_retriever = rag_system.base_retriever

In [19]:
test_q = "How did Zully Broussard's selfless decision to donate a kidney lead to six patients receiving transplants?"
res = rag_system.rag_chain.invoke(test_q)

In [23]:

model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
compressor = CrossEncoderReranker(model=model, top_n=3)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=base_retriever
)


In [24]:

compressed_docs = compression_retriever.invoke(test_q)
len(compressed_docs)

3

### Doing reranking with CohereReranker

In [33]:
base_retriever = rag_system.base_retriever

In [34]:
llm = Cohere(temperature=0)
compressor = CohereRerank(model="rerank-english-v3.0")
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=base_retriever
)

In [36]:
rag_system.final_retriever = compression_retriever
rag_system.setup_rag_chain()

##### Using LLM cohere and cohere reranker retriever

In [37]:
# from src.rag_pipeline.rag_utils import rag_chain_setup

# rag_system.final_retriever = compression_retriever
# rag_system.rag_chain = rag_chain_setup(compression_retriever, llm)

#### Test the RAG Chain

In [38]:
question = "Who was one of Putin's harshest critics?"
result = rag_system.rag_chain.invoke(question)

In [39]:
result

{'question': "Who was one of Putin's harshest critics?",
 'answer': "One of Putin's harshest critics was Boris Nemtsov.",
 'contexts': ['Moscow (CNN)In his first substantive comments since Kremlin critic Boris Nemtsov\'s death, Russian President Vladimir Putin on Wednesday called the killing a "disgrace" and lashed out at what he called "extremists" and protesters. Nemtsov had been one of Putin\'s harshest critics and had been arrested several times for speaking against the President\'s government. The 55-year-old opposition leader was gunned down Friday night in Moscow as he walked across a bridge about 100 meters (330 feet) from the Kremlin with his girlfriend, Ukrainian model Anna Duritskaya, 23. His slaying spurred thousands to rally in his honor in Moscow, with many calling him a true Russian patriot at his funeral Tuesday. Nemtsov isn\'t the first of Putin\'s critics to turn up dead, with others including Anna Politkovskaya (who was fatally shot) and Alexander Litvinenko (who was

## RAGAS Pipeline testing the rag_chain

### Ragas Testing with Langsmith Tracing

In [7]:
# experiment_name = "baseline_rag_benchmark_1"
# dataset_name = "cnn_dailymail_evaluation"

# rag_results = run_ragas_evaluation(
#   rag_chain=rag_system.rag_chain,
#   use_langsmith=True,
#   experiment_name=experiment_name,
#   dataset_name=dataset_name,
#   upload_dataset_to_langsmith=True,
#   save_results=True
# )

--LOADING EVALUATION DATA--
--GETTING CONTEXT AND ANSWERS--
--USING LANGSMITH FOR EVALUATION--
Created a new dataset 'cnn_dailymail_evaluation'. Dataset is accessible at https://smith.langchain.com/o/6691a6dd-a70e-56c0-8f45-a1f64338d797/datasets/8e291ee7-635e-40c2-ab54-1d2e8897e5f6
View the evaluation results for project 'baseline_rag_benchmark' at:
https://smith.langchain.com/o/6691a6dd-a70e-56c0-8f45-a1f64338d797/datasets/8e291ee7-635e-40c2-ab54-1d2e8897e5f6/compare?selectedSessions=a58cdd46-9bf6-44ae-9ea4-f0853631205f

View all tests for Dataset cnn_dailymail_evaluation at:
https://smith.langchain.com/o/6691a6dd-a70e-56c0-8f45-a1f64338d797/datasets/8e291ee7-635e-40c2-ab54-1d2e8897e5f6
[------------>                                     ] 5/19

Error evaluating run f591f3a5-4864-48c3-ac91-409ab305f428 with EvaluatorChain: APIConnectionError('Connection error.')
Traceback (most recent call last):
  File "/home/hilla/.cache/pypoetry/virtualenvs/rag-optimization-cnn-dailymail-hiPg4Kip-py3.10/lib/python3.10/site-packages/openai/_base_client.py", line 1558, in _request
    response = await self._client.send(
  File "/home/hilla/.cache/pypoetry/virtualenvs/rag-optimization-cnn-dailymail-hiPg4Kip-py3.10/lib/python3.10/site-packages/httpx/_client.py", line 1661, in send
    response = await self._send_handling_auth(
  File "/home/hilla/.cache/pypoetry/virtualenvs/rag-optimization-cnn-dailymail-hiPg4Kip-py3.10/lib/python3.10/site-packages/httpx/_client.py", line 1689, in _send_handling_auth
    response = await self._send_handling_redirects(
  File "/home/hilla/.cache/pypoetry/virtualenvs/rag-optimization-cnn-dailymail-hiPg4Kip-py3.10/lib/python3.10/site-packages/httpx/_client.py", line 1726, in _send_handling_redirects
    response =

[------------------------------------------------->] 19/19

Unnamed: 0,feedback.answer_correctness,feedback.faithfulness,feedback.answer_relevancy,feedback.context_precision,error,execution_time,run_id
count,19.0,18.0,18.0,18.0,0.0,19.0,19
unique,,,,,0.0,,19
top,,,,,,,31f949c4-1476-4eb2-ae11-f23eb62af6d3
freq,,,,,,,1
mean,0.706439,0.851852,0.887768,0.965509,,2.434766,
std,0.20325,0.24347,0.225174,0.083576,,0.693174,
min,0.229624,0.25,0.0,0.679167,,1.334236,
25%,0.579877,0.6875,0.918437,1.0,,2.05128,
50%,0.743723,1.0,0.934425,1.0,,2.481985,
75%,0.832633,1.0,0.963321,1.0,,2.726066,


--EVALUATION COMPLETE--


AttributeError: 'TestResult' object has no attribute 'to_pandas'

### Run Ragas tests locally

In [40]:
rag_results = run_ragas_evaluation(
  rag_chain=rag_system.rag_chain,
  save_results=True,
  experiment_name="cohere_reranker_with_llm_openai_gpt4o"
)


--LOADING EVALUATION DATA--
--EVALUATING LOCALLY--
--GETTING CONTEXT AND ANSWERS--


TooManyRequestsError: status_code: 429, body: data=None message="You are using a Trial key, which is limited to 10 API calls / minute. You can continue to use the Trial key for free or upgrade to a Production key with higher rate limits at 'https://dashboard.cohere.com/api-keys'. Contact us on 'https://discord.gg/XW44jPfYJu' or email us at support@cohere.com with any questions"