### Import modules

In [1]:
import os
import pandas as pd

os.chdir("../../")

from datasets import load_dataset
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import HuggingFaceDatasetLoader
 # Reranker imports
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain_huggingface import HuggingFaceEmbeddings

# Cohere reranker imports
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank
from langchain_community.llms import Cohere


# from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_cohere import CohereEmbeddings

from dotenv import load_dotenv

In [2]:
from src.rag_pipeline import chunk_by_recursive_split, RAGSystem, Reranker
from src.env_loader import load_api_keys
from src.ragas.ragas_pipeline import run_ragas_evaluation
from src import display_df

### Load API keys

In [3]:
openai_api_key = load_api_keys("OPENAI_API_KEY")

#### Initialize embeddings and RAG system

In [4]:
# embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

# embeddings_model = 'text-embedding-ada-002'
# embeddings_model = 'text-embedding-3-large'
# embeddings = OpenAIEmbeddings(api_key=openai_api_key, model=embeddings_model)

# embeddings=FastEmbedEmbeddings(model_name="BAAI/bge-large-en-v1.5")

# embeddings = CohereEmbeddings(model="embed-english-v3.0")

#### Doing reranking with CrossEncoderReranker

In [5]:
# embeddings for cross encoder reranker
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/msmarco-distilbert-dot-v5"
)

### Setup the RAG system

In [10]:
rag_system = RAGSystem(
  model_name = "gpt-4o",
  existing_vectorstore = False,
  embeddings = embeddings,
  clear_store = True,
  k_documents = 20,
  use_reranker = True,
  top_n_ranked = 5,
)

In [11]:
rag_system.initialize()

--SETUP NEW VECTORSTORE--
--Split 1000 documents into 5030 chunks.--
--USING BASE RETRIEVER--
--SETUP RERANKER--
--USING OPEN SOURCE MODEL FOR RERANKING--
--SETUP RAG CHAIN--
--RAGCHAIN SETUP COMPLETE!--


In [8]:
# test_q = "How did Zully Broussard's selfless decision to donate a kidney lead to six patients receiving transplants?"
# res = rag_system.rag_chain.invoke(test_q)
# res

#### Test the RAG Chain

In [12]:
question = "Who was one of Putin's harshest critics?"
result = rag_system.rag_chain.invoke(question)

In [13]:
result

{'question': "Who was one of Putin's harshest critics?",
 'answer': "Boris Nemtsov was one of Putin's harshest critics.",
 'contexts': ['Moscow (CNN)In his first substantive comments since Kremlin critic Boris Nemtsov\'s death, Russian President Vladimir Putin on Wednesday called the killing a "disgrace" and lashed out at what he called "extremists" and protesters. Nemtsov had been one of Putin\'s harshest critics and had been arrested several times for speaking against the President\'s government. The 55-year-old opposition leader was gunned down Friday night in Moscow as he walked across a bridge about 100 meters (330 feet) from the Kremlin with his girlfriend, Ukrainian model Anna Duritskaya, 23. His slaying spurred thousands to rally in his honor in Moscow, with many calling him a true Russian patriot at his funeral Tuesday. Nemtsov isn\'t the first of Putin\'s critics to turn up dead, with others including Anna Politkovskaya (who was fatally shot) and Alexander Litvinenko (who was

## RAGAS Pipeline testing the rag_chain

### Run Ragas tests locally

In [14]:
rag_results = run_ragas_evaluation(
  rag_chain=rag_system.rag_chain,
  save_results=True,
  experiment_name="reranker_opensource_model_msmacro_distilbert"
)


--LOADING EVALUATION DATA--
--EVALUATING LOCALLY--
--GETTING CONTEXT AND ANSWERS--


Evaluating:   0%|          | 0/80 [00:00<?, ?it/s]

--EVALUATION COMPLETE--
--RESULTS SAVED--
